10b57cec5SDimitry Andric //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the transformation that promotes indirect calls to 100b57cec5SDimitry Andric // conditional direct calls when the indirect-call value profile metadata is 110b57cec5SDimitry Andric // available. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 16*0fca6ea1SDimitry Andric #include "llvm/ADT/DenseMap.h" 170b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 180b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 190b57cec5SDimitry Andric #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/IndirectCallVisitor.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 23*0fca6ea1SDimitry Andric #include "llvm/Analysis/TypeMetadataUtils.h" 240b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 25*0fca6ea1SDimitry Andric #include "llvm/IR/Dominators.h" 260b57cec5SDimitry Andric #include "llvm/IR/Function.h" 270b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 280b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 290b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 300b57cec5SDimitry Andric #include "llvm/IR/MDBuilder.h" 310b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 325f757f3fSDimitry Andric #include "llvm/IR/ProfDataUtils.h" 330b57cec5SDimitry Andric #include "llvm/IR/Value.h" 340b57cec5SDimitry Andric #include "llvm/ProfileData/InstrProf.h" 350b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 360b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 370b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 380b57cec5SDimitry Andric #include "llvm/Support/Error.h" 390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 400b57cec5SDimitry Andric #include "llvm/Transforms/Instrumentation.h" 410b57cec5SDimitry Andric #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 420b57cec5SDimitry Andric #include "llvm/Transforms/Utils/CallPromotionUtils.h" 430b57cec5SDimitry Andric #include <cassert> 440b57cec5SDimitry Andric #include <cstdint> 450b57cec5SDimitry Andric #include <memory> 46*0fca6ea1SDimitry Andric #include <set> 470b57cec5SDimitry Andric #include <string> 48*0fca6ea1SDimitry Andric #include <unordered_map> 490b57cec5SDimitry Andric #include <utility> 500b57cec5SDimitry Andric #include <vector> 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric using namespace llvm; 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric #define DEBUG_TYPE "pgo-icall-prom" 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); 570b57cec5SDimitry Andric STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); 580b57cec5SDimitry Andric 59*0fca6ea1SDimitry Andric extern cl::opt<unsigned> MaxNumVTableAnnotations; 60*0fca6ea1SDimitry Andric 61*0fca6ea1SDimitry Andric namespace llvm { 62*0fca6ea1SDimitry Andric extern cl::opt<bool> EnableVTableProfileUse; 63*0fca6ea1SDimitry Andric } 64*0fca6ea1SDimitry Andric 650b57cec5SDimitry Andric // Command line option to disable indirect-call promotion with the default as 660b57cec5SDimitry Andric // false. This is for debug purpose. 670b57cec5SDimitry Andric static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden, 680b57cec5SDimitry Andric cl::desc("Disable indirect call promotion")); 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric // Set the cutoff value for the promotion. If the value is other than 0, we 710b57cec5SDimitry Andric // stop the transformation once the total number of promotions equals the cutoff 720b57cec5SDimitry Andric // value. 730b57cec5SDimitry Andric // For debug use only. 740b57cec5SDimitry Andric static cl::opt<unsigned> 7581ad6265SDimitry Andric ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, 760b57cec5SDimitry Andric cl::desc("Max number of promotions for this compilation")); 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. 790b57cec5SDimitry Andric // For debug use only. 800b57cec5SDimitry Andric static cl::opt<unsigned> 8181ad6265SDimitry Andric ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, 820b57cec5SDimitry Andric cl::desc("Skip Callsite up to this number for this compilation")); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric // Set if the pass is called in LTO optimization. The difference for LTO mode 850b57cec5SDimitry Andric // is the pass won't prefix the source module name to the internal linkage 860b57cec5SDimitry Andric // symbols. 870b57cec5SDimitry Andric static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, 880b57cec5SDimitry Andric cl::desc("Run indirect-call promotion in LTO " 890b57cec5SDimitry Andric "mode")); 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric // Set if the pass is called in SamplePGO mode. The difference for SamplePGO 920b57cec5SDimitry Andric // mode is it will add prof metadatato the created direct call. 930b57cec5SDimitry Andric static cl::opt<bool> 940b57cec5SDimitry Andric ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, 950b57cec5SDimitry Andric cl::desc("Run indirect-call promotion in SamplePGO mode")); 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric // If the option is set to true, only call instructions will be considered for 980b57cec5SDimitry Andric // transformation -- invoke instructions will be ignored. 990b57cec5SDimitry Andric static cl::opt<bool> 1000b57cec5SDimitry Andric ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, 1010b57cec5SDimitry Andric cl::desc("Run indirect-call promotion for call instructions " 1020b57cec5SDimitry Andric "only")); 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric // If the option is set to true, only invoke instructions will be considered for 1050b57cec5SDimitry Andric // transformation -- call instructions will be ignored. 1060b57cec5SDimitry Andric static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false), 1070b57cec5SDimitry Andric cl::Hidden, 1080b57cec5SDimitry Andric cl::desc("Run indirect-call promotion for " 1090b57cec5SDimitry Andric "invoke instruction only")); 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric // Dump the function level IR if the transformation happened in this 1120b57cec5SDimitry Andric // function. For debug use only. 1130b57cec5SDimitry Andric static cl::opt<bool> 1140b57cec5SDimitry Andric ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, 1150b57cec5SDimitry Andric cl::desc("Dump IR after transformation happens")); 1160b57cec5SDimitry Andric 117*0fca6ea1SDimitry Andric // Indirect call promotion pass will fall back to function-based comparison if 118*0fca6ea1SDimitry Andric // vtable-count / function-count is smaller than this threshold. 119*0fca6ea1SDimitry Andric static cl::opt<float> ICPVTablePercentageThreshold( 120*0fca6ea1SDimitry Andric "icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden, 121*0fca6ea1SDimitry Andric cl::desc("The percentage threshold of vtable-count / function-count for " 122*0fca6ea1SDimitry Andric "cost-benefit analysis.")); 123*0fca6ea1SDimitry Andric 124*0fca6ea1SDimitry Andric // Although comparing vtables can save a vtable load, we may need to compare 125*0fca6ea1SDimitry Andric // vtable pointer with multiple vtable address points due to class inheritance. 126*0fca6ea1SDimitry Andric // Comparing with multiple vtables inserts additional instructions on hot code 127*0fca6ea1SDimitry Andric // path, and doing so for an earlier candidate delays the comparisons for later 128*0fca6ea1SDimitry Andric // candidates. For the last candidate, only the fallback path is affected. 129*0fca6ea1SDimitry Andric // We allow multiple vtable comparison for the last function candidate and use 130*0fca6ea1SDimitry Andric // the option below to cap the number of vtables. 131*0fca6ea1SDimitry Andric static cl::opt<int> ICPMaxNumVTableLastCandidate( 132*0fca6ea1SDimitry Andric "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, 133*0fca6ea1SDimitry Andric cl::desc("The maximum number of vtable for the last candidate.")); 134*0fca6ea1SDimitry Andric 1350b57cec5SDimitry Andric namespace { 1360b57cec5SDimitry Andric 137*0fca6ea1SDimitry Andric // The key is a vtable global variable, and the value is a map. 138*0fca6ea1SDimitry Andric // In the inner map, the key represents address point offsets and the value is a 139*0fca6ea1SDimitry Andric // constant for this address point. 140*0fca6ea1SDimitry Andric using VTableAddressPointOffsetValMap = 141*0fca6ea1SDimitry Andric SmallDenseMap<const GlobalVariable *, std::unordered_map<int, Constant *>>; 142*0fca6ea1SDimitry Andric 143*0fca6ea1SDimitry Andric // A struct to collect type information for a virtual call site. 144*0fca6ea1SDimitry Andric struct VirtualCallSiteInfo { 145*0fca6ea1SDimitry Andric // The offset from the address point to virtual function in the vtable. 146*0fca6ea1SDimitry Andric uint64_t FunctionOffset; 147*0fca6ea1SDimitry Andric // The instruction that computes the address point of vtable. 148*0fca6ea1SDimitry Andric Instruction *VPtr; 149*0fca6ea1SDimitry Andric // The compatible type used in LLVM type intrinsics. 150*0fca6ea1SDimitry Andric StringRef CompatibleTypeStr; 151*0fca6ea1SDimitry Andric }; 152*0fca6ea1SDimitry Andric 153*0fca6ea1SDimitry Andric // The key is a virtual call, and value is its type information. 154*0fca6ea1SDimitry Andric using VirtualCallSiteTypeInfoMap = 155*0fca6ea1SDimitry Andric SmallDenseMap<const CallBase *, VirtualCallSiteInfo>; 156*0fca6ea1SDimitry Andric 157*0fca6ea1SDimitry Andric // The key is vtable GUID, and value is its value profile count. 158*0fca6ea1SDimitry Andric using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>; 159*0fca6ea1SDimitry Andric 160*0fca6ea1SDimitry Andric // Return the address point offset of the given compatible type. 161*0fca6ea1SDimitry Andric // 162*0fca6ea1SDimitry Andric // Type metadata of a vtable specifies the types that can contain a pointer to 163*0fca6ea1SDimitry Andric // this vtable, for example, `Base*` can be a pointer to an derived type 164*0fca6ea1SDimitry Andric // but not vice versa. See also https://llvm.org/docs/TypeMetadata.html 165*0fca6ea1SDimitry Andric static std::optional<uint64_t> 166*0fca6ea1SDimitry Andric getAddressPointOffset(const GlobalVariable &VTableVar, 167*0fca6ea1SDimitry Andric StringRef CompatibleType) { 168*0fca6ea1SDimitry Andric SmallVector<MDNode *> Types; 169*0fca6ea1SDimitry Andric VTableVar.getMetadata(LLVMContext::MD_type, Types); 170*0fca6ea1SDimitry Andric 171*0fca6ea1SDimitry Andric for (MDNode *Type : Types) 172*0fca6ea1SDimitry Andric if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()); 173*0fca6ea1SDimitry Andric TypeId && TypeId->getString() == CompatibleType) 174*0fca6ea1SDimitry Andric return cast<ConstantInt>( 175*0fca6ea1SDimitry Andric cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) 176*0fca6ea1SDimitry Andric ->getZExtValue(); 177*0fca6ea1SDimitry Andric 178*0fca6ea1SDimitry Andric return std::nullopt; 179*0fca6ea1SDimitry Andric } 180*0fca6ea1SDimitry Andric 181*0fca6ea1SDimitry Andric // Return a constant representing the vtable's address point specified by the 182*0fca6ea1SDimitry Andric // offset. 183*0fca6ea1SDimitry Andric static Constant *getVTableAddressPointOffset(GlobalVariable *VTable, 184*0fca6ea1SDimitry Andric uint32_t AddressPointOffset) { 185*0fca6ea1SDimitry Andric Module &M = *VTable->getParent(); 186*0fca6ea1SDimitry Andric LLVMContext &Context = M.getContext(); 187*0fca6ea1SDimitry Andric assert(AddressPointOffset < 188*0fca6ea1SDimitry Andric M.getDataLayout().getTypeAllocSize(VTable->getValueType()) && 189*0fca6ea1SDimitry Andric "Out-of-bound access"); 190*0fca6ea1SDimitry Andric 191*0fca6ea1SDimitry Andric return ConstantExpr::getInBoundsGetElementPtr( 192*0fca6ea1SDimitry Andric Type::getInt8Ty(Context), VTable, 193*0fca6ea1SDimitry Andric llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset)); 194*0fca6ea1SDimitry Andric } 195*0fca6ea1SDimitry Andric 196*0fca6ea1SDimitry Andric // Return the basic block in which Use `U` is used via its `UserInst`. 197*0fca6ea1SDimitry Andric static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) { 198*0fca6ea1SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(UserInst)) 199*0fca6ea1SDimitry Andric return PN->getIncomingBlock(U); 200*0fca6ea1SDimitry Andric 201*0fca6ea1SDimitry Andric return UserInst->getParent(); 202*0fca6ea1SDimitry Andric } 203*0fca6ea1SDimitry Andric 204*0fca6ea1SDimitry Andric // `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users 205*0fca6ea1SDimitry Andric // and all users are in `DestBB`. The caller guarantees that `Inst->getParent()` 206*0fca6ea1SDimitry Andric // is the sole predecessor of `DestBB` and `DestBB` is dominated by 207*0fca6ea1SDimitry Andric // `Inst->getParent()`. 208*0fca6ea1SDimitry Andric static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) { 209*0fca6ea1SDimitry Andric // 'BB' is used only by assert. 210*0fca6ea1SDimitry Andric [[maybe_unused]] BasicBlock *BB = Inst->getParent(); 211*0fca6ea1SDimitry Andric 212*0fca6ea1SDimitry Andric assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 && 213*0fca6ea1SDimitry Andric DestBB->getUniquePredecessor() == BB && 214*0fca6ea1SDimitry Andric "Guaranteed by ICP transformation"); 215*0fca6ea1SDimitry Andric 216*0fca6ea1SDimitry Andric BasicBlock *UserBB = nullptr; 217*0fca6ea1SDimitry Andric for (Use &Use : Inst->uses()) { 218*0fca6ea1SDimitry Andric User *User = Use.getUser(); 219*0fca6ea1SDimitry Andric // Do checked cast since IR verifier guarantees that the user of an 220*0fca6ea1SDimitry Andric // instruction must be an instruction. See `Verifier::visitInstruction`. 221*0fca6ea1SDimitry Andric Instruction *UserInst = cast<Instruction>(User); 222*0fca6ea1SDimitry Andric // We can sink debug or pseudo instructions together with Inst. 223*0fca6ea1SDimitry Andric if (UserInst->isDebugOrPseudoInst()) 224*0fca6ea1SDimitry Andric continue; 225*0fca6ea1SDimitry Andric UserBB = getUserBasicBlock(Use, UserInst); 226*0fca6ea1SDimitry Andric // Do not sink if Inst is used in a basic block that is not DestBB. 227*0fca6ea1SDimitry Andric // TODO: Sink to the common dominator of all user blocks. 228*0fca6ea1SDimitry Andric if (UserBB != DestBB) 229*0fca6ea1SDimitry Andric return false; 230*0fca6ea1SDimitry Andric } 231*0fca6ea1SDimitry Andric return UserBB != nullptr; 232*0fca6ea1SDimitry Andric } 233*0fca6ea1SDimitry Andric 234*0fca6ea1SDimitry Andric // For the virtual call dispatch sequence, try to sink vtable load instructions 235*0fca6ea1SDimitry Andric // to the cold indirect call fallback. 236*0fca6ea1SDimitry Andric // FIXME: Move the sink eligibility check below to a utility function in 237*0fca6ea1SDimitry Andric // Transforms/Utils/ directory. 238*0fca6ea1SDimitry Andric static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { 239*0fca6ea1SDimitry Andric if (!isDestBBSuitableForSink(I, DestBlock)) 240*0fca6ea1SDimitry Andric return false; 241*0fca6ea1SDimitry Andric 242*0fca6ea1SDimitry Andric // Do not move control-flow-involving, volatile loads, vaarg, alloca 243*0fca6ea1SDimitry Andric // instructions, etc. 244*0fca6ea1SDimitry Andric if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || 245*0fca6ea1SDimitry Andric isa<AllocaInst>(I)) 246*0fca6ea1SDimitry Andric return false; 247*0fca6ea1SDimitry Andric 248*0fca6ea1SDimitry Andric // Do not sink convergent call instructions. 249*0fca6ea1SDimitry Andric if (const auto *C = dyn_cast<CallBase>(I)) 250*0fca6ea1SDimitry Andric if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent()) 251*0fca6ea1SDimitry Andric return false; 252*0fca6ea1SDimitry Andric 253*0fca6ea1SDimitry Andric // Do not move an instruction that may write to memory. 254*0fca6ea1SDimitry Andric if (I->mayWriteToMemory()) 255*0fca6ea1SDimitry Andric return false; 256*0fca6ea1SDimitry Andric 257*0fca6ea1SDimitry Andric // We can only sink load instructions if there is nothing between the load and 258*0fca6ea1SDimitry Andric // the end of block that could change the value. 259*0fca6ea1SDimitry Andric if (I->mayReadFromMemory()) { 260*0fca6ea1SDimitry Andric // We already know that SrcBlock is the unique predecessor of DestBlock. 261*0fca6ea1SDimitry Andric for (BasicBlock::iterator Scan = std::next(I->getIterator()), 262*0fca6ea1SDimitry Andric E = I->getParent()->end(); 263*0fca6ea1SDimitry Andric Scan != E; ++Scan) { 264*0fca6ea1SDimitry Andric // Note analysis analysis can tell whether two pointers can point to the 265*0fca6ea1SDimitry Andric // same object in memory or not thereby find further opportunities to 266*0fca6ea1SDimitry Andric // sink. 267*0fca6ea1SDimitry Andric if (Scan->mayWriteToMemory()) 268*0fca6ea1SDimitry Andric return false; 269*0fca6ea1SDimitry Andric } 270*0fca6ea1SDimitry Andric } 271*0fca6ea1SDimitry Andric 272*0fca6ea1SDimitry Andric BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); 273*0fca6ea1SDimitry Andric I->moveBefore(*DestBlock, InsertPos); 274*0fca6ea1SDimitry Andric 275*0fca6ea1SDimitry Andric // TODO: Sink debug intrinsic users of I to 'DestBlock'. 276*0fca6ea1SDimitry Andric // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and 277*0fca6ea1SDimitry Andric // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have 278*0fca6ea1SDimitry Andric // the core logic to do this. 279*0fca6ea1SDimitry Andric return true; 280*0fca6ea1SDimitry Andric } 281*0fca6ea1SDimitry Andric 282*0fca6ea1SDimitry Andric // Try to sink instructions after VPtr to the indirect call fallback. 283*0fca6ea1SDimitry Andric // Return the number of sunk IR instructions. 284*0fca6ea1SDimitry Andric static int tryToSinkInstructions(BasicBlock *OriginalBB, 285*0fca6ea1SDimitry Andric BasicBlock *IndirectCallBB) { 286*0fca6ea1SDimitry Andric int SinkCount = 0; 287*0fca6ea1SDimitry Andric // Do not sink across a critical edge for simplicity. 288*0fca6ea1SDimitry Andric if (IndirectCallBB->getUniquePredecessor() != OriginalBB) 289*0fca6ea1SDimitry Andric return SinkCount; 290*0fca6ea1SDimitry Andric // Sink all eligible instructions in OriginalBB in reverse order. 291*0fca6ea1SDimitry Andric for (Instruction &I : 292*0fca6ea1SDimitry Andric llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(*OriginalBB)))) 293*0fca6ea1SDimitry Andric if (tryToSinkInstruction(&I, IndirectCallBB)) 294*0fca6ea1SDimitry Andric SinkCount++; 295*0fca6ea1SDimitry Andric 296*0fca6ea1SDimitry Andric return SinkCount; 297*0fca6ea1SDimitry Andric } 298*0fca6ea1SDimitry Andric 29906c3fb27SDimitry Andric // Promote indirect calls to conditional direct calls, keeping track of 30006c3fb27SDimitry Andric // thresholds. 30106c3fb27SDimitry Andric class IndirectCallPromoter { 3020b57cec5SDimitry Andric private: 3030b57cec5SDimitry Andric Function &F; 304*0fca6ea1SDimitry Andric Module &M; 305*0fca6ea1SDimitry Andric 306*0fca6ea1SDimitry Andric ProfileSummaryInfo *PSI = nullptr; 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric // Symtab that maps indirect call profile values to function names and 3090b57cec5SDimitry Andric // defines. 31006c3fb27SDimitry Andric InstrProfSymtab *const Symtab; 3110b57cec5SDimitry Andric 31206c3fb27SDimitry Andric const bool SamplePGO; 3130b57cec5SDimitry Andric 314*0fca6ea1SDimitry Andric // A map from a virtual call to its type information. 315*0fca6ea1SDimitry Andric const VirtualCallSiteTypeInfoMap &VirtualCSInfo; 316*0fca6ea1SDimitry Andric 317*0fca6ea1SDimitry Andric VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal; 318*0fca6ea1SDimitry Andric 3190b57cec5SDimitry Andric OptimizationRemarkEmitter &ORE; 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric // A struct that records the direct target and it's call count. 3220b57cec5SDimitry Andric struct PromotionCandidate { 32306c3fb27SDimitry Andric Function *const TargetFunction; 32406c3fb27SDimitry Andric const uint64_t Count; 3250b57cec5SDimitry Andric 326*0fca6ea1SDimitry Andric // The following fields only exists for promotion candidates with vtable 327*0fca6ea1SDimitry Andric // information. 328*0fca6ea1SDimitry Andric // 329*0fca6ea1SDimitry Andric // Due to class inheritance, one virtual call candidate can come from 330*0fca6ea1SDimitry Andric // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and 331*0fca6ea1SDimitry Andric // counts for 'TargetFunction'. `AddressPoints` stores the vtable address 332*0fca6ea1SDimitry Andric // points for comparison. 333*0fca6ea1SDimitry Andric VTableGUIDCountsMap VTableGUIDAndCounts; 334*0fca6ea1SDimitry Andric SmallVector<Constant *> AddressPoints; 335*0fca6ea1SDimitry Andric 3360b57cec5SDimitry Andric PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} 3370b57cec5SDimitry Andric }; 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric // Check if the indirect-call call site should be promoted. Return the number 3400b57cec5SDimitry Andric // of promotions. Inst is the candidate indirect call, ValueDataRef 3410b57cec5SDimitry Andric // contains the array of value profile data for profiled targets, 3420b57cec5SDimitry Andric // TotalCount is the total profiled count of call executions, and 3430b57cec5SDimitry Andric // NumCandidates is the number of candidate entries in ValueDataRef. 3440b57cec5SDimitry Andric std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( 345*0fca6ea1SDimitry Andric const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef, 3460b57cec5SDimitry Andric uint64_t TotalCount, uint32_t NumCandidates); 3470b57cec5SDimitry Andric 348*0fca6ea1SDimitry Andric // Promote a list of targets for one indirect-call callsite by comparing 349*0fca6ea1SDimitry Andric // indirect callee with functions. Return true if there are IR 350*0fca6ea1SDimitry Andric // transformations and false otherwise. 351*0fca6ea1SDimitry Andric bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr, 352*0fca6ea1SDimitry Andric ArrayRef<PromotionCandidate> Candidates, 353*0fca6ea1SDimitry Andric uint64_t TotalCount, 354*0fca6ea1SDimitry Andric ArrayRef<InstrProfValueData> ICallProfDataRef, 355*0fca6ea1SDimitry Andric uint32_t NumCandidates, 356*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts); 357*0fca6ea1SDimitry Andric 358*0fca6ea1SDimitry Andric // Promote a list of targets for one indirect call by comparing vtables with 359*0fca6ea1SDimitry Andric // functions. Return true if there are IR transformations and false 360*0fca6ea1SDimitry Andric // otherwise. 361*0fca6ea1SDimitry Andric bool tryToPromoteWithVTableCmp( 362*0fca6ea1SDimitry Andric CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates, 363*0fca6ea1SDimitry Andric uint64_t TotalFuncCount, uint32_t NumCandidates, 364*0fca6ea1SDimitry Andric MutableArrayRef<InstrProfValueData> ICallProfDataRef, 365*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts); 366*0fca6ea1SDimitry Andric 367*0fca6ea1SDimitry Andric // Return true if it's profitable to compare vtables for the callsite. 368*0fca6ea1SDimitry Andric bool isProfitableToCompareVTables(const CallBase &CB, 369*0fca6ea1SDimitry Andric ArrayRef<PromotionCandidate> Candidates, 370*0fca6ea1SDimitry Andric uint64_t TotalCount); 371*0fca6ea1SDimitry Andric 372*0fca6ea1SDimitry Andric // Given an indirect callsite and the list of function candidates, compute 373*0fca6ea1SDimitry Andric // the following vtable information in output parameters and return vtable 374*0fca6ea1SDimitry Andric // pointer if type profiles exist. 375*0fca6ea1SDimitry Andric // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof 376*0fca6ea1SDimitry Andric // metadata attached on the vtable pointer. 377*0fca6ea1SDimitry Andric // - For each function candidate, finds out the vtables from which it gets 378*0fca6ea1SDimitry Andric // called and stores the <vtable-guid, count> in promotion candidate. 379*0fca6ea1SDimitry Andric Instruction *computeVTableInfos(const CallBase *CB, 380*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts, 381*0fca6ea1SDimitry Andric std::vector<PromotionCandidate> &Candidates); 382*0fca6ea1SDimitry Andric 383*0fca6ea1SDimitry Andric Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV, 384*0fca6ea1SDimitry Andric uint64_t AddressPointOffset); 385*0fca6ea1SDimitry Andric 386*0fca6ea1SDimitry Andric void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs, 387*0fca6ea1SDimitry Andric uint64_t Sum, uint32_t MaxMDCount); 388*0fca6ea1SDimitry Andric 389*0fca6ea1SDimitry Andric void updateVPtrValueProfiles(Instruction *VPtr, 390*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts); 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric public: 393*0fca6ea1SDimitry Andric IndirectCallPromoter( 394*0fca6ea1SDimitry Andric Function &Func, Module &M, ProfileSummaryInfo *PSI, 395*0fca6ea1SDimitry Andric InstrProfSymtab *Symtab, bool SamplePGO, 396*0fca6ea1SDimitry Andric const VirtualCallSiteTypeInfoMap &VirtualCSInfo, 397*0fca6ea1SDimitry Andric VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal, 39806c3fb27SDimitry Andric OptimizationRemarkEmitter &ORE) 399*0fca6ea1SDimitry Andric : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO), 400*0fca6ea1SDimitry Andric VirtualCSInfo(VirtualCSInfo), 401*0fca6ea1SDimitry Andric VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {} 40206c3fb27SDimitry Andric IndirectCallPromoter(const IndirectCallPromoter &) = delete; 40306c3fb27SDimitry Andric IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete; 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric bool processFunction(ProfileSummaryInfo *PSI); 4060b57cec5SDimitry Andric }; 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric } // end anonymous namespace 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric // Indirect-call promotion heuristic. The direct targets are sorted based on 4110b57cec5SDimitry Andric // the count. Stop at the first target that is not promoted. 41206c3fb27SDimitry Andric std::vector<IndirectCallPromoter::PromotionCandidate> 41306c3fb27SDimitry Andric IndirectCallPromoter::getPromotionCandidatesForCallSite( 414*0fca6ea1SDimitry Andric const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef, 4150b57cec5SDimitry Andric uint64_t TotalCount, uint32_t NumCandidates) { 4160b57cec5SDimitry Andric std::vector<PromotionCandidate> Ret; 4170b57cec5SDimitry Andric 4185ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB 4190b57cec5SDimitry Andric << " Num_targets: " << ValueDataRef.size() 4200b57cec5SDimitry Andric << " Num_candidates: " << NumCandidates << "\n"); 4210b57cec5SDimitry Andric NumOfPGOICallsites++; 4220b57cec5SDimitry Andric if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { 4230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Skip: User options.\n"); 4240b57cec5SDimitry Andric return Ret; 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric for (uint32_t I = 0; I < NumCandidates; I++) { 4280b57cec5SDimitry Andric uint64_t Count = ValueDataRef[I].Count; 4290b57cec5SDimitry Andric assert(Count <= TotalCount); 430fe6060f1SDimitry Andric (void)TotalCount; 4310b57cec5SDimitry Andric uint64_t Target = ValueDataRef[I].Value; 4320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count 4330b57cec5SDimitry Andric << " Target_func: " << Target << "\n"); 4340b57cec5SDimitry Andric 4355ffd83dbSDimitry Andric if (ICPInvokeOnly && isa<CallInst>(CB)) { 4360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not promote: User options.\n"); 4370b57cec5SDimitry Andric ORE.emit([&]() { 4385ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) 4390b57cec5SDimitry Andric << " Not promote: User options"; 4400b57cec5SDimitry Andric }); 4410b57cec5SDimitry Andric break; 4420b57cec5SDimitry Andric } 4435ffd83dbSDimitry Andric if (ICPCallOnly && isa<InvokeInst>(CB)) { 4440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not promote: User option.\n"); 4450b57cec5SDimitry Andric ORE.emit([&]() { 4465ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) 4470b57cec5SDimitry Andric << " Not promote: User options"; 4480b57cec5SDimitry Andric }); 4490b57cec5SDimitry Andric break; 4500b57cec5SDimitry Andric } 4510b57cec5SDimitry Andric if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { 4520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); 4530b57cec5SDimitry Andric ORE.emit([&]() { 4545ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB) 4550b57cec5SDimitry Andric << " Not promote: Cutoff reached"; 4560b57cec5SDimitry Andric }); 4570b57cec5SDimitry Andric break; 4580b57cec5SDimitry Andric } 4590b57cec5SDimitry Andric 460e8d8bef9SDimitry Andric // Don't promote if the symbol is not defined in the module. This avoids 461e8d8bef9SDimitry Andric // creating a reference to a symbol that doesn't exist in the module 462e8d8bef9SDimitry Andric // This can happen when we compile with a sample profile collected from 463e8d8bef9SDimitry Andric // one binary but used for another, which may have profiled targets that 464e8d8bef9SDimitry Andric // aren't used in the new binary. We might have a declaration initially in 465e8d8bef9SDimitry Andric // the case where the symbol is globally dead in the binary and removed by 466e8d8bef9SDimitry Andric // ThinLTO. 4670b57cec5SDimitry Andric Function *TargetFunction = Symtab->getFunction(Target); 468e8d8bef9SDimitry Andric if (TargetFunction == nullptr || TargetFunction->isDeclaration()) { 4690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n"); 4700b57cec5SDimitry Andric ORE.emit([&]() { 4715ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB) 4720b57cec5SDimitry Andric << "Cannot promote indirect call: target with md5sum " 4730b57cec5SDimitry Andric << ore::NV("target md5sum", Target) << " not found"; 4740b57cec5SDimitry Andric }); 4750b57cec5SDimitry Andric break; 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric const char *Reason = nullptr; 4795ffd83dbSDimitry Andric if (!isLegalToPromote(CB, TargetFunction, &Reason)) { 4800b57cec5SDimitry Andric using namespace ore; 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric ORE.emit([&]() { 4835ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB) 4840b57cec5SDimitry Andric << "Cannot promote indirect call to " 4850b57cec5SDimitry Andric << NV("TargetFunction", TargetFunction) << " with count of " 4860b57cec5SDimitry Andric << NV("Count", Count) << ": " << Reason; 4870b57cec5SDimitry Andric }); 4880b57cec5SDimitry Andric break; 4890b57cec5SDimitry Andric } 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric Ret.push_back(PromotionCandidate(TargetFunction, Count)); 4920b57cec5SDimitry Andric TotalCount -= Count; 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric return Ret; 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 497*0fca6ea1SDimitry Andric Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar( 498*0fca6ea1SDimitry Andric GlobalVariable *GV, uint64_t AddressPointOffset) { 499*0fca6ea1SDimitry Andric auto [Iter, Inserted] = 500*0fca6ea1SDimitry Andric VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr); 501*0fca6ea1SDimitry Andric if (Inserted) 502*0fca6ea1SDimitry Andric Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset); 503*0fca6ea1SDimitry Andric return Iter->second; 504*0fca6ea1SDimitry Andric } 505*0fca6ea1SDimitry Andric 506*0fca6ea1SDimitry Andric Instruction *IndirectCallPromoter::computeVTableInfos( 507*0fca6ea1SDimitry Andric const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap, 508*0fca6ea1SDimitry Andric std::vector<PromotionCandidate> &Candidates) { 509*0fca6ea1SDimitry Andric if (!EnableVTableProfileUse) 510*0fca6ea1SDimitry Andric return nullptr; 511*0fca6ea1SDimitry Andric 512*0fca6ea1SDimitry Andric // Take the following code sequence as an example, here is how the code works 513*0fca6ea1SDimitry Andric // @vtable1 = {[n x ptr] [... ptr @func1]} 514*0fca6ea1SDimitry Andric // @vtable2 = {[m x ptr] [... ptr @func2]} 515*0fca6ea1SDimitry Andric // 516*0fca6ea1SDimitry Andric // %vptr = load ptr, ptr %d, !prof !0 517*0fca6ea1SDimitry Andric // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1") 518*0fca6ea1SDimitry Andric // tail call void @llvm.assume(i1 %0) 519*0fca6ea1SDimitry Andric // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1 520*0fca6ea1SDimitry Andric // %1 = load ptr, ptr %vfn 521*0fca6ea1SDimitry Andric // call void %1(ptr %d), !prof !1 522*0fca6ea1SDimitry Andric // 523*0fca6ea1SDimitry Andric // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50} 524*0fca6ea1SDimitry Andric // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50} 525*0fca6ea1SDimitry Andric // 526*0fca6ea1SDimitry Andric // Step 1. Find out the %vptr instruction for indirect call and use its !prof 527*0fca6ea1SDimitry Andric // to populate `GUIDCountsMap`. 528*0fca6ea1SDimitry Andric // Step 2. For each vtable-guid, look up its definition from symtab. LTO can 529*0fca6ea1SDimitry Andric // make vtable definitions visible across modules. 530*0fca6ea1SDimitry Andric // Step 3. Compute the byte offset of the virtual call, by adding vtable 531*0fca6ea1SDimitry Andric // address point offset and function's offset relative to vtable address 532*0fca6ea1SDimitry Andric // point. For each function candidate, this step tells us the vtable from 533*0fca6ea1SDimitry Andric // which it comes from, and the vtable address point to compare %vptr with. 534*0fca6ea1SDimitry Andric 535*0fca6ea1SDimitry Andric // Only virtual calls have virtual call site info. 536*0fca6ea1SDimitry Andric auto Iter = VirtualCSInfo.find(CB); 537*0fca6ea1SDimitry Andric if (Iter == VirtualCSInfo.end()) 538*0fca6ea1SDimitry Andric return nullptr; 539*0fca6ea1SDimitry Andric 540*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #" 541*0fca6ea1SDimitry Andric << NumOfPGOICallsites << "\n"); 542*0fca6ea1SDimitry Andric 543*0fca6ea1SDimitry Andric const auto &VirtualCallInfo = Iter->second; 544*0fca6ea1SDimitry Andric Instruction *VPtr = VirtualCallInfo.VPtr; 545*0fca6ea1SDimitry Andric 546*0fca6ea1SDimitry Andric SmallDenseMap<Function *, int, 4> CalleeIndexMap; 547*0fca6ea1SDimitry Andric for (size_t I = 0; I < Candidates.size(); I++) 548*0fca6ea1SDimitry Andric CalleeIndexMap[Candidates[I].TargetFunction] = I; 549*0fca6ea1SDimitry Andric 550*0fca6ea1SDimitry Andric uint64_t TotalVTableCount = 0; 551*0fca6ea1SDimitry Andric auto VTableValueDataArray = 552*0fca6ea1SDimitry Andric getValueProfDataFromInst(*VirtualCallInfo.VPtr, IPVK_VTableTarget, 553*0fca6ea1SDimitry Andric MaxNumVTableAnnotations, TotalVTableCount); 554*0fca6ea1SDimitry Andric if (VTableValueDataArray.empty()) 555*0fca6ea1SDimitry Andric return VPtr; 556*0fca6ea1SDimitry Andric 557*0fca6ea1SDimitry Andric // Compute the functions and counts from by each vtable. 558*0fca6ea1SDimitry Andric for (const auto &V : VTableValueDataArray) { 559*0fca6ea1SDimitry Andric uint64_t VTableVal = V.Value; 560*0fca6ea1SDimitry Andric GUIDCountsMap[VTableVal] = V.Count; 561*0fca6ea1SDimitry Andric GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal); 562*0fca6ea1SDimitry Andric if (!VTableVar) { 563*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal 564*0fca6ea1SDimitry Andric << "; maybe the vtable isn't imported\n"); 565*0fca6ea1SDimitry Andric continue; 566*0fca6ea1SDimitry Andric } 567*0fca6ea1SDimitry Andric 568*0fca6ea1SDimitry Andric std::optional<uint64_t> MaybeAddressPointOffset = 569*0fca6ea1SDimitry Andric getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr); 570*0fca6ea1SDimitry Andric if (!MaybeAddressPointOffset) 571*0fca6ea1SDimitry Andric continue; 572*0fca6ea1SDimitry Andric 573*0fca6ea1SDimitry Andric const uint64_t AddressPointOffset = *MaybeAddressPointOffset; 574*0fca6ea1SDimitry Andric 575*0fca6ea1SDimitry Andric Function *Callee = nullptr; 576*0fca6ea1SDimitry Andric std::tie(Callee, std::ignore) = getFunctionAtVTableOffset( 577*0fca6ea1SDimitry Andric VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M); 578*0fca6ea1SDimitry Andric if (!Callee) 579*0fca6ea1SDimitry Andric continue; 580*0fca6ea1SDimitry Andric auto CalleeIndexIter = CalleeIndexMap.find(Callee); 581*0fca6ea1SDimitry Andric if (CalleeIndexIter == CalleeIndexMap.end()) 582*0fca6ea1SDimitry Andric continue; 583*0fca6ea1SDimitry Andric 584*0fca6ea1SDimitry Andric auto &Candidate = Candidates[CalleeIndexIter->second]; 585*0fca6ea1SDimitry Andric // There shouldn't be duplicate GUIDs in one !prof metadata (except 586*0fca6ea1SDimitry Andric // duplicated zeros), so assign counters directly won't cause overwrite or 587*0fca6ea1SDimitry Andric // counter loss. 588*0fca6ea1SDimitry Andric Candidate.VTableGUIDAndCounts[VTableVal] = V.Count; 589*0fca6ea1SDimitry Andric Candidate.AddressPoints.push_back( 590*0fca6ea1SDimitry Andric getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset)); 591*0fca6ea1SDimitry Andric } 592*0fca6ea1SDimitry Andric 593*0fca6ea1SDimitry Andric return VPtr; 594*0fca6ea1SDimitry Andric } 595*0fca6ea1SDimitry Andric 596*0fca6ea1SDimitry Andric // Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight. 597*0fca6ea1SDimitry Andric // Scales uint64_t counters down to uint32_t if necessary to prevent overflow. 598*0fca6ea1SDimitry Andric static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, 599*0fca6ea1SDimitry Andric uint64_t FalseWeight) { 600*0fca6ea1SDimitry Andric MDBuilder MDB(Context); 601*0fca6ea1SDimitry Andric uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight)); 602*0fca6ea1SDimitry Andric return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale), 603*0fca6ea1SDimitry Andric scaleBranchCount(FalseWeight, Scale)); 604*0fca6ea1SDimitry Andric } 605*0fca6ea1SDimitry Andric 6065ffd83dbSDimitry Andric CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, 6070b57cec5SDimitry Andric uint64_t Count, uint64_t TotalCount, 6080b57cec5SDimitry Andric bool AttachProfToDirectCall, 6090b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE) { 610*0fca6ea1SDimitry Andric CallBase &NewInst = promoteCallWithIfThenElse( 611*0fca6ea1SDimitry Andric CB, DirectCallee, 612*0fca6ea1SDimitry Andric createBranchWeights(CB.getContext(), Count, TotalCount - Count)); 6130b57cec5SDimitry Andric 614*0fca6ea1SDimitry Andric if (AttachProfToDirectCall) 615*0fca6ea1SDimitry Andric setBranchWeights(NewInst, {static_cast<uint32_t>(Count)}, 616*0fca6ea1SDimitry Andric /*IsExpected=*/false); 6170b57cec5SDimitry Andric 6180b57cec5SDimitry Andric using namespace ore; 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric if (ORE) 6210b57cec5SDimitry Andric ORE->emit([&]() { 6225ffd83dbSDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB) 6230b57cec5SDimitry Andric << "Promote indirect call to " << NV("DirectCallee", DirectCallee) 6240b57cec5SDimitry Andric << " with count " << NV("Count", Count) << " out of " 6250b57cec5SDimitry Andric << NV("TotalCount", TotalCount); 6260b57cec5SDimitry Andric }); 6270b57cec5SDimitry Andric return NewInst; 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric 6300b57cec5SDimitry Andric // Promote indirect-call to conditional direct-call for one callsite. 631*0fca6ea1SDimitry Andric bool IndirectCallPromoter::tryToPromoteWithFuncCmp( 632*0fca6ea1SDimitry Andric CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates, 633*0fca6ea1SDimitry Andric uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef, 634*0fca6ea1SDimitry Andric uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) { 6350b57cec5SDimitry Andric uint32_t NumPromoted = 0; 6360b57cec5SDimitry Andric 637bdd1243dSDimitry Andric for (const auto &C : Candidates) { 638*0fca6ea1SDimitry Andric uint64_t FuncCount = C.Count; 639*0fca6ea1SDimitry Andric pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount, 640*0fca6ea1SDimitry Andric SamplePGO, &ORE); 641*0fca6ea1SDimitry Andric assert(TotalCount >= FuncCount); 642*0fca6ea1SDimitry Andric TotalCount -= FuncCount; 6430b57cec5SDimitry Andric NumOfPGOICallPromotion++; 6440b57cec5SDimitry Andric NumPromoted++; 645*0fca6ea1SDimitry Andric 646*0fca6ea1SDimitry Andric if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty()) 647*0fca6ea1SDimitry Andric continue; 648*0fca6ea1SDimitry Andric 649*0fca6ea1SDimitry Andric // After a virtual call candidate gets promoted, update the vtable's counts 650*0fca6ea1SDimitry Andric // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents 651*0fca6ea1SDimitry Andric // a vtable from which the virtual call is loaded. Compute the sum and use 652*0fca6ea1SDimitry Andric // 128-bit APInt to improve accuracy. 653*0fca6ea1SDimitry Andric uint64_t SumVTableCount = 0; 654*0fca6ea1SDimitry Andric for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) 655*0fca6ea1SDimitry Andric SumVTableCount += VTableCount; 656*0fca6ea1SDimitry Andric 657*0fca6ea1SDimitry Andric for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) { 658*0fca6ea1SDimitry Andric APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/); 659*0fca6ea1SDimitry Andric APFuncCount *= VTableCount; 660*0fca6ea1SDimitry Andric VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue(); 6610b57cec5SDimitry Andric } 662*0fca6ea1SDimitry Andric } 663*0fca6ea1SDimitry Andric if (NumPromoted == 0) 664*0fca6ea1SDimitry Andric return false; 665*0fca6ea1SDimitry Andric 666*0fca6ea1SDimitry Andric assert(NumPromoted <= ICallProfDataRef.size() && 667*0fca6ea1SDimitry Andric "Number of promoted functions should not be greater than the number " 668*0fca6ea1SDimitry Andric "of values in profile metadata"); 669*0fca6ea1SDimitry Andric 670*0fca6ea1SDimitry Andric // Update value profiles on the indirect call. 671*0fca6ea1SDimitry Andric updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount, 672*0fca6ea1SDimitry Andric NumCandidates); 673*0fca6ea1SDimitry Andric updateVPtrValueProfiles(VPtr, VTableGUIDCounts); 674*0fca6ea1SDimitry Andric return true; 675*0fca6ea1SDimitry Andric } 676*0fca6ea1SDimitry Andric 677*0fca6ea1SDimitry Andric void IndirectCallPromoter::updateFuncValueProfiles( 678*0fca6ea1SDimitry Andric CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount, 679*0fca6ea1SDimitry Andric uint32_t MaxMDCount) { 680*0fca6ea1SDimitry Andric // First clear the existing !prof. 681*0fca6ea1SDimitry Andric CB.setMetadata(LLVMContext::MD_prof, nullptr); 682*0fca6ea1SDimitry Andric // Annotate the remaining value profiles if counter is not zero. 683*0fca6ea1SDimitry Andric if (TotalCount != 0) 684*0fca6ea1SDimitry Andric annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget, 685*0fca6ea1SDimitry Andric MaxMDCount); 686*0fca6ea1SDimitry Andric } 687*0fca6ea1SDimitry Andric 688*0fca6ea1SDimitry Andric void IndirectCallPromoter::updateVPtrValueProfiles( 689*0fca6ea1SDimitry Andric Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) { 690*0fca6ea1SDimitry Andric if (!EnableVTableProfileUse || VPtr == nullptr || 691*0fca6ea1SDimitry Andric !VPtr->getMetadata(LLVMContext::MD_prof)) 692*0fca6ea1SDimitry Andric return; 693*0fca6ea1SDimitry Andric VPtr->setMetadata(LLVMContext::MD_prof, nullptr); 694*0fca6ea1SDimitry Andric std::vector<InstrProfValueData> VTableValueProfiles; 695*0fca6ea1SDimitry Andric uint64_t TotalVTableCount = 0; 696*0fca6ea1SDimitry Andric for (auto [GUID, Count] : VTableGUIDCounts) { 697*0fca6ea1SDimitry Andric if (Count == 0) 698*0fca6ea1SDimitry Andric continue; 699*0fca6ea1SDimitry Andric 700*0fca6ea1SDimitry Andric VTableValueProfiles.push_back({GUID, Count}); 701*0fca6ea1SDimitry Andric TotalVTableCount += Count; 702*0fca6ea1SDimitry Andric } 703*0fca6ea1SDimitry Andric llvm::sort(VTableValueProfiles, 704*0fca6ea1SDimitry Andric [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) { 705*0fca6ea1SDimitry Andric return LHS.Count > RHS.Count; 706*0fca6ea1SDimitry Andric }); 707*0fca6ea1SDimitry Andric 708*0fca6ea1SDimitry Andric annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount, 709*0fca6ea1SDimitry Andric IPVK_VTableTarget, VTableValueProfiles.size()); 710*0fca6ea1SDimitry Andric } 711*0fca6ea1SDimitry Andric 712*0fca6ea1SDimitry Andric bool IndirectCallPromoter::tryToPromoteWithVTableCmp( 713*0fca6ea1SDimitry Andric CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates, 714*0fca6ea1SDimitry Andric uint64_t TotalFuncCount, uint32_t NumCandidates, 715*0fca6ea1SDimitry Andric MutableArrayRef<InstrProfValueData> ICallProfDataRef, 716*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts) { 717*0fca6ea1SDimitry Andric SmallVector<uint64_t, 4> PromotedFuncCount; 718*0fca6ea1SDimitry Andric 719*0fca6ea1SDimitry Andric for (const auto &Candidate : Candidates) { 720*0fca6ea1SDimitry Andric for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) 721*0fca6ea1SDimitry Andric VTableGUIDCounts[GUID] -= Count; 722*0fca6ea1SDimitry Andric 723*0fca6ea1SDimitry Andric // 'OriginalBB' is the basic block of indirect call. After each candidate 724*0fca6ea1SDimitry Andric // is promoted, a new basic block is created for the indirect fallback basic 725*0fca6ea1SDimitry Andric // block and indirect call `CB` is moved into this new BB. 726*0fca6ea1SDimitry Andric BasicBlock *OriginalBB = CB.getParent(); 727*0fca6ea1SDimitry Andric promoteCallWithVTableCmp( 728*0fca6ea1SDimitry Andric CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints, 729*0fca6ea1SDimitry Andric createBranchWeights(CB.getContext(), Candidate.Count, 730*0fca6ea1SDimitry Andric TotalFuncCount - Candidate.Count)); 731*0fca6ea1SDimitry Andric 732*0fca6ea1SDimitry Andric int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent()); 733*0fca6ea1SDimitry Andric 734*0fca6ea1SDimitry Andric ORE.emit([&]() { 735*0fca6ea1SDimitry Andric OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB); 736*0fca6ea1SDimitry Andric 737*0fca6ea1SDimitry Andric const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; 738*0fca6ea1SDimitry Andric Remark << "Promote indirect call to " 739*0fca6ea1SDimitry Andric << ore::NV("DirectCallee", Candidate.TargetFunction) 740*0fca6ea1SDimitry Andric << " with count " << ore::NV("Count", Candidate.Count) 741*0fca6ea1SDimitry Andric << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink " 742*0fca6ea1SDimitry Andric << ore::NV("SinkCount", SinkCount) 743*0fca6ea1SDimitry Andric << " instruction(s) and compare " 744*0fca6ea1SDimitry Andric << ore::NV("VTable", VTableGUIDAndCounts.size()) 745*0fca6ea1SDimitry Andric << " vtable(s): {"; 746*0fca6ea1SDimitry Andric 747*0fca6ea1SDimitry Andric // Sort GUIDs so remark message is deterministic. 748*0fca6ea1SDimitry Andric std::set<uint64_t> GUIDSet; 749*0fca6ea1SDimitry Andric for (auto [GUID, Count] : VTableGUIDAndCounts) 750*0fca6ea1SDimitry Andric GUIDSet.insert(GUID); 751*0fca6ea1SDimitry Andric for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) { 752*0fca6ea1SDimitry Andric if (Iter != GUIDSet.begin()) 753*0fca6ea1SDimitry Andric Remark << ", "; 754*0fca6ea1SDimitry Andric Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter)); 755*0fca6ea1SDimitry Andric } 756*0fca6ea1SDimitry Andric 757*0fca6ea1SDimitry Andric Remark << "}"; 758*0fca6ea1SDimitry Andric 759*0fca6ea1SDimitry Andric return Remark; 760*0fca6ea1SDimitry Andric }); 761*0fca6ea1SDimitry Andric 762*0fca6ea1SDimitry Andric PromotedFuncCount.push_back(Candidate.Count); 763*0fca6ea1SDimitry Andric 764*0fca6ea1SDimitry Andric assert(TotalFuncCount >= Candidate.Count && 765*0fca6ea1SDimitry Andric "Within one prof metadata, total count is the sum of counts from " 766*0fca6ea1SDimitry Andric "individual <target, count> pairs"); 767*0fca6ea1SDimitry Andric // Use std::min since 'TotalFuncCount' is the saturated sum of individual 768*0fca6ea1SDimitry Andric // counts, see 769*0fca6ea1SDimitry Andric // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288 770*0fca6ea1SDimitry Andric TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count); 771*0fca6ea1SDimitry Andric NumOfPGOICallPromotion++; 772*0fca6ea1SDimitry Andric } 773*0fca6ea1SDimitry Andric 774*0fca6ea1SDimitry Andric if (PromotedFuncCount.empty()) 775*0fca6ea1SDimitry Andric return false; 776*0fca6ea1SDimitry Andric 777*0fca6ea1SDimitry Andric // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a 778*0fca6ea1SDimitry Andric // a distinct 'VPtr'. 779*0fca6ea1SDimitry Andric // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be 780*0fca6ea1SDimitry Andric // used to load multiple virtual functions. The vtable profiles needs to be 781*0fca6ea1SDimitry Andric // updated properly in that case (e.g, for each indirect call annotate both 782*0fca6ea1SDimitry Andric // type profiles and function profiles in one !prof). 783*0fca6ea1SDimitry Andric for (size_t I = 0; I < PromotedFuncCount.size(); I++) 784*0fca6ea1SDimitry Andric ICallProfDataRef[I].Count -= 785*0fca6ea1SDimitry Andric std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count); 786*0fca6ea1SDimitry Andric // Sort value profiles by count in descending order. 787*0fca6ea1SDimitry Andric llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS, 788*0fca6ea1SDimitry Andric const InstrProfValueData &RHS) { 789*0fca6ea1SDimitry Andric return LHS.Count > RHS.Count; 790*0fca6ea1SDimitry Andric }); 791*0fca6ea1SDimitry Andric // Drop the <target-value, count> pair if count is zero. 792*0fca6ea1SDimitry Andric ArrayRef<InstrProfValueData> VDs( 793*0fca6ea1SDimitry Andric ICallProfDataRef.begin(), 794*0fca6ea1SDimitry Andric llvm::upper_bound(ICallProfDataRef, 0U, 795*0fca6ea1SDimitry Andric [](uint64_t Count, const InstrProfValueData &ProfData) { 796*0fca6ea1SDimitry Andric return ProfData.Count <= Count; 797*0fca6ea1SDimitry Andric })); 798*0fca6ea1SDimitry Andric updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates); 799*0fca6ea1SDimitry Andric updateVPtrValueProfiles(VPtr, VTableGUIDCounts); 800*0fca6ea1SDimitry Andric return true; 8010b57cec5SDimitry Andric } 8020b57cec5SDimitry Andric 8030b57cec5SDimitry Andric // Traverse all the indirect-call callsite and get the value profile 8040b57cec5SDimitry Andric // annotation to perform indirect-call promotion. 80506c3fb27SDimitry Andric bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { 8060b57cec5SDimitry Andric bool Changed = false; 8070b57cec5SDimitry Andric ICallPromotionAnalysis ICallAnalysis; 8085ffd83dbSDimitry Andric for (auto *CB : findIndirectCalls(F)) { 809*0fca6ea1SDimitry Andric uint32_t NumCandidates; 8100b57cec5SDimitry Andric uint64_t TotalCount; 8110b57cec5SDimitry Andric auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( 812*0fca6ea1SDimitry Andric CB, TotalCount, NumCandidates); 8130b57cec5SDimitry Andric if (!NumCandidates || 8140b57cec5SDimitry Andric (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount))) 8150b57cec5SDimitry Andric continue; 816*0fca6ea1SDimitry Andric 8170b57cec5SDimitry Andric auto PromotionCandidates = getPromotionCandidatesForCallSite( 8185ffd83dbSDimitry Andric *CB, ICallProfDataRef, TotalCount, NumCandidates); 8190b57cec5SDimitry Andric 820*0fca6ea1SDimitry Andric VTableGUIDCountsMap VTableGUIDCounts; 821*0fca6ea1SDimitry Andric Instruction *VPtr = 822*0fca6ea1SDimitry Andric computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates); 823*0fca6ea1SDimitry Andric 824*0fca6ea1SDimitry Andric if (isProfitableToCompareVTables(*CB, PromotionCandidates, TotalCount)) 825*0fca6ea1SDimitry Andric Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates, 826*0fca6ea1SDimitry Andric TotalCount, NumCandidates, 827*0fca6ea1SDimitry Andric ICallProfDataRef, VTableGUIDCounts); 828*0fca6ea1SDimitry Andric else 829*0fca6ea1SDimitry Andric Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates, 830*0fca6ea1SDimitry Andric TotalCount, ICallProfDataRef, 831*0fca6ea1SDimitry Andric NumCandidates, VTableGUIDCounts); 8320b57cec5SDimitry Andric } 8330b57cec5SDimitry Andric return Changed; 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric 836*0fca6ea1SDimitry Andric // TODO: Return false if the function addressing and vtable load instructions 837*0fca6ea1SDimitry Andric // cannot sink to indirect fallback. 838*0fca6ea1SDimitry Andric bool IndirectCallPromoter::isProfitableToCompareVTables( 839*0fca6ea1SDimitry Andric const CallBase &CB, ArrayRef<PromotionCandidate> Candidates, 840*0fca6ea1SDimitry Andric uint64_t TotalCount) { 841*0fca6ea1SDimitry Andric if (!EnableVTableProfileUse || Candidates.empty()) 842*0fca6ea1SDimitry Andric return false; 843*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #" 844*0fca6ea1SDimitry Andric << NumOfPGOICallsites << CB << "\n"); 845*0fca6ea1SDimitry Andric uint64_t RemainingVTableCount = TotalCount; 846*0fca6ea1SDimitry Andric const size_t CandidateSize = Candidates.size(); 847*0fca6ea1SDimitry Andric for (size_t I = 0; I < CandidateSize; I++) { 848*0fca6ea1SDimitry Andric auto &Candidate = Candidates[I]; 849*0fca6ea1SDimitry Andric auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; 850*0fca6ea1SDimitry Andric 851*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Candidate " << I << " FunctionCount: " 852*0fca6ea1SDimitry Andric << Candidate.Count << ", VTableCounts:"); 853*0fca6ea1SDimitry Andric // Add [[maybe_unused]] since <GUID, Count> are only used by LLVM_DEBUG. 854*0fca6ea1SDimitry Andric for ([[maybe_unused]] auto &[GUID, Count] : VTableGUIDAndCounts) 855*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName() 856*0fca6ea1SDimitry Andric << ", " << Count << "}"); 857*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 858*0fca6ea1SDimitry Andric 859*0fca6ea1SDimitry Andric uint64_t CandidateVTableCount = 0; 860*0fca6ea1SDimitry Andric for (auto &[GUID, Count] : VTableGUIDAndCounts) 861*0fca6ea1SDimitry Andric CandidateVTableCount += Count; 862*0fca6ea1SDimitry Andric 863*0fca6ea1SDimitry Andric if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) { 864*0fca6ea1SDimitry Andric LLVM_DEBUG( 865*0fca6ea1SDimitry Andric dbgs() << " function count " << Candidate.Count 866*0fca6ea1SDimitry Andric << " and its vtable sum count " << CandidateVTableCount 867*0fca6ea1SDimitry Andric << " have discrepancies. Bail out vtable comparison.\n"); 868*0fca6ea1SDimitry Andric return false; 869*0fca6ea1SDimitry Andric } 870*0fca6ea1SDimitry Andric 871*0fca6ea1SDimitry Andric RemainingVTableCount -= Candidate.Count; 872*0fca6ea1SDimitry Andric 873*0fca6ea1SDimitry Andric // 'MaxNumVTable' limits the number of vtables to make vtable comparison 874*0fca6ea1SDimitry Andric // profitable. Comparing multiple vtables for one function candidate will 875*0fca6ea1SDimitry Andric // insert additional instructions on the hot path, and allowing more than 876*0fca6ea1SDimitry Andric // one vtable for non last candidates may or may not elongate the dependency 877*0fca6ea1SDimitry Andric // chain for the subsequent candidates. Set its value to 1 for non-last 878*0fca6ea1SDimitry Andric // candidate and allow option to override it for the last candidate. 879*0fca6ea1SDimitry Andric int MaxNumVTable = 1; 880*0fca6ea1SDimitry Andric if (I == CandidateSize - 1) 881*0fca6ea1SDimitry Andric MaxNumVTable = ICPMaxNumVTableLastCandidate; 882*0fca6ea1SDimitry Andric 883*0fca6ea1SDimitry Andric if ((int)Candidate.AddressPoints.size() > MaxNumVTable) { 884*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got " 885*0fca6ea1SDimitry Andric << Candidate.AddressPoints.size() 886*0fca6ea1SDimitry Andric << " vtables. Bail out for vtable comparison.\n"); 887*0fca6ea1SDimitry Andric return false; 888*0fca6ea1SDimitry Andric } 889*0fca6ea1SDimitry Andric } 890*0fca6ea1SDimitry Andric 891*0fca6ea1SDimitry Andric // If the indirect fallback is not cold, don't compare vtables. 892*0fca6ea1SDimitry Andric if (PSI && PSI->hasProfileSummary() && 893*0fca6ea1SDimitry Andric !PSI->isColdCount(RemainingVTableCount)) { 894*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Indirect fallback basic block is not cold. Bail " 895*0fca6ea1SDimitry Andric "out for vtable comparison.\n"); 896*0fca6ea1SDimitry Andric return false; 897*0fca6ea1SDimitry Andric } 898*0fca6ea1SDimitry Andric 899*0fca6ea1SDimitry Andric return true; 900*0fca6ea1SDimitry Andric } 901*0fca6ea1SDimitry Andric 902*0fca6ea1SDimitry Andric // For virtual calls in the module, collect per-callsite information which will 903*0fca6ea1SDimitry Andric // be used to associate an ICP candidate with a vtable and a specific function 904*0fca6ea1SDimitry Andric // in the vtable. With type intrinsics (llvm.type.test), we can find virtual 905*0fca6ea1SDimitry Andric // calls in a compile-time efficient manner (by iterating its users) and more 906*0fca6ea1SDimitry Andric // importantly use the compatible type later to figure out the function byte 907*0fca6ea1SDimitry Andric // offset relative to the start of vtables. 908*0fca6ea1SDimitry Andric static void 909*0fca6ea1SDimitry Andric computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, 910*0fca6ea1SDimitry Andric VirtualCallSiteTypeInfoMap &VirtualCSInfo) { 911*0fca6ea1SDimitry Andric // Right now only llvm.type.test is used to find out virtual call sites. 912*0fca6ea1SDimitry Andric // With ThinLTO and whole-program-devirtualization, llvm.type.test and 913*0fca6ea1SDimitry Andric // llvm.public.type.test are emitted, and llvm.public.type.test is either 914*0fca6ea1SDimitry Andric // refined to llvm.type.test or dropped before indirect-call-promotion pass. 915*0fca6ea1SDimitry Andric // 916*0fca6ea1SDimitry Andric // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted. 917*0fca6ea1SDimitry Andric // Find out virtual calls by looking at users of llvm.type.checked.load in 918*0fca6ea1SDimitry Andric // that case. 919*0fca6ea1SDimitry Andric Function *TypeTestFunc = 920*0fca6ea1SDimitry Andric M.getFunction(Intrinsic::getName(Intrinsic::type_test)); 921*0fca6ea1SDimitry Andric if (!TypeTestFunc || TypeTestFunc->use_empty()) 922*0fca6ea1SDimitry Andric return; 923*0fca6ea1SDimitry Andric 924*0fca6ea1SDimitry Andric auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 925*0fca6ea1SDimitry Andric auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & { 926*0fca6ea1SDimitry Andric return FAM.getResult<DominatorTreeAnalysis>(F); 927*0fca6ea1SDimitry Andric }; 928*0fca6ea1SDimitry Andric // Iterate all type.test calls to find all indirect calls. 929*0fca6ea1SDimitry Andric for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) { 930*0fca6ea1SDimitry Andric auto *CI = dyn_cast<CallInst>(U.getUser()); 931*0fca6ea1SDimitry Andric if (!CI) 932*0fca6ea1SDimitry Andric continue; 933*0fca6ea1SDimitry Andric auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1)); 934*0fca6ea1SDimitry Andric if (!TypeMDVal) 935*0fca6ea1SDimitry Andric continue; 936*0fca6ea1SDimitry Andric auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()); 937*0fca6ea1SDimitry Andric if (!CompatibleTypeId) 938*0fca6ea1SDimitry Andric continue; 939*0fca6ea1SDimitry Andric 940*0fca6ea1SDimitry Andric // Find out all devirtualizable call sites given a llvm.type.test 941*0fca6ea1SDimitry Andric // intrinsic call. 942*0fca6ea1SDimitry Andric SmallVector<DevirtCallSite, 1> DevirtCalls; 943*0fca6ea1SDimitry Andric SmallVector<CallInst *, 1> Assumes; 944*0fca6ea1SDimitry Andric auto &DT = LookupDomTree(*CI->getFunction()); 945*0fca6ea1SDimitry Andric findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); 946*0fca6ea1SDimitry Andric 947*0fca6ea1SDimitry Andric for (auto &DevirtCall : DevirtCalls) { 948*0fca6ea1SDimitry Andric CallBase &CB = DevirtCall.CB; 949*0fca6ea1SDimitry Andric // Given an indirect call, try find the instruction which loads a 950*0fca6ea1SDimitry Andric // pointer to virtual table. 951*0fca6ea1SDimitry Andric Instruction *VTablePtr = 952*0fca6ea1SDimitry Andric PGOIndirectCallVisitor::tryGetVTableInstruction(&CB); 953*0fca6ea1SDimitry Andric if (!VTablePtr) 954*0fca6ea1SDimitry Andric continue; 955*0fca6ea1SDimitry Andric VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr, 956*0fca6ea1SDimitry Andric CompatibleTypeId->getString()}; 957*0fca6ea1SDimitry Andric } 958*0fca6ea1SDimitry Andric } 959*0fca6ea1SDimitry Andric } 960*0fca6ea1SDimitry Andric 9610b57cec5SDimitry Andric // A wrapper function that does the actual work. 96206c3fb27SDimitry Andric static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, 96306c3fb27SDimitry Andric bool SamplePGO, ModuleAnalysisManager &MAM) { 9640b57cec5SDimitry Andric if (DisableICP) 9650b57cec5SDimitry Andric return false; 9660b57cec5SDimitry Andric InstrProfSymtab Symtab; 9670b57cec5SDimitry Andric if (Error E = Symtab.create(M, InLTO)) { 9680b57cec5SDimitry Andric std::string SymtabFailure = toString(std::move(E)); 969fe6060f1SDimitry Andric M.getContext().emitError("Failed to create symtab: " + SymtabFailure); 9700b57cec5SDimitry Andric return false; 9710b57cec5SDimitry Andric } 9720b57cec5SDimitry Andric bool Changed = false; 973*0fca6ea1SDimitry Andric VirtualCallSiteTypeInfoMap VirtualCSInfo; 974*0fca6ea1SDimitry Andric 975*0fca6ea1SDimitry Andric if (EnableVTableProfileUse) 976*0fca6ea1SDimitry Andric computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo); 977*0fca6ea1SDimitry Andric 978*0fca6ea1SDimitry Andric // VTableAddressPointOffsetVal stores the vtable address points. The vtable 979*0fca6ea1SDimitry Andric // address point of a given <vtable, address point offset> is static (doesn't 980*0fca6ea1SDimitry Andric // change after being computed once). 981*0fca6ea1SDimitry Andric // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map 982*0fca6ea1SDimitry Andric // entry the first time a <vtable, offset> pair is seen, as 983*0fca6ea1SDimitry Andric // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter 984*0fca6ea1SDimitry Andric // repeatedly on each function. 985*0fca6ea1SDimitry Andric VTableAddressPointOffsetValMap VTableAddressPointOffsetVal; 986*0fca6ea1SDimitry Andric 9870b57cec5SDimitry Andric for (auto &F : M) { 9880b57cec5SDimitry Andric if (F.isDeclaration() || F.hasOptNone()) 9890b57cec5SDimitry Andric continue; 9900b57cec5SDimitry Andric 9910b57cec5SDimitry Andric auto &FAM = 99206c3fb27SDimitry Andric MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 99306c3fb27SDimitry Andric auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 9940b57cec5SDimitry Andric 995*0fca6ea1SDimitry Andric IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO, 996*0fca6ea1SDimitry Andric VirtualCSInfo, 997*0fca6ea1SDimitry Andric VTableAddressPointOffsetVal, ORE); 99806c3fb27SDimitry Andric bool FuncChanged = CallPromoter.processFunction(PSI); 9990b57cec5SDimitry Andric if (ICPDUMPAFTER && FuncChanged) { 10000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); 10010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric Changed |= FuncChanged; 10040b57cec5SDimitry Andric if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { 10050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n"); 10060b57cec5SDimitry Andric break; 10070b57cec5SDimitry Andric } 10080b57cec5SDimitry Andric } 10090b57cec5SDimitry Andric return Changed; 10100b57cec5SDimitry Andric } 10110b57cec5SDimitry Andric 10120b57cec5SDimitry Andric PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, 101306c3fb27SDimitry Andric ModuleAnalysisManager &MAM) { 101406c3fb27SDimitry Andric ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M); 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, 101706c3fb27SDimitry Andric SamplePGO | ICPSamplePGOMode, MAM)) 10180b57cec5SDimitry Andric return PreservedAnalyses::all(); 10190b57cec5SDimitry Andric 10200b57cec5SDimitry Andric return PreservedAnalyses::none(); 10210b57cec5SDimitry Andric } 1022