xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the transformation that promotes indirect calls to
100b57cec5SDimitry Andric // conditional direct calls when the indirect-call value profile metadata is
110b57cec5SDimitry Andric // available.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
16*0fca6ea1SDimitry Andric #include "llvm/ADT/DenseMap.h"
170b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
180b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
190b57cec5SDimitry Andric #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/IndirectCallVisitor.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
23*0fca6ea1SDimitry Andric #include "llvm/Analysis/TypeMetadataUtils.h"
240b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
25*0fca6ea1SDimitry Andric #include "llvm/IR/Dominators.h"
260b57cec5SDimitry Andric #include "llvm/IR/Function.h"
270b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
280b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
290b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
300b57cec5SDimitry Andric #include "llvm/IR/MDBuilder.h"
310b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
325f757f3fSDimitry Andric #include "llvm/IR/ProfDataUtils.h"
330b57cec5SDimitry Andric #include "llvm/IR/Value.h"
340b57cec5SDimitry Andric #include "llvm/ProfileData/InstrProf.h"
350b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
360b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
370b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
380b57cec5SDimitry Andric #include "llvm/Support/Error.h"
390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
400b57cec5SDimitry Andric #include "llvm/Transforms/Instrumentation.h"
410b57cec5SDimitry Andric #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
420b57cec5SDimitry Andric #include "llvm/Transforms/Utils/CallPromotionUtils.h"
430b57cec5SDimitry Andric #include <cassert>
440b57cec5SDimitry Andric #include <cstdint>
450b57cec5SDimitry Andric #include <memory>
46*0fca6ea1SDimitry Andric #include <set>
470b57cec5SDimitry Andric #include <string>
48*0fca6ea1SDimitry Andric #include <unordered_map>
490b57cec5SDimitry Andric #include <utility>
500b57cec5SDimitry Andric #include <vector>
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric using namespace llvm;
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric #define DEBUG_TYPE "pgo-icall-prom"
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
570b57cec5SDimitry Andric STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
580b57cec5SDimitry Andric 
59*0fca6ea1SDimitry Andric extern cl::opt<unsigned> MaxNumVTableAnnotations;
60*0fca6ea1SDimitry Andric 
61*0fca6ea1SDimitry Andric namespace llvm {
62*0fca6ea1SDimitry Andric extern cl::opt<bool> EnableVTableProfileUse;
63*0fca6ea1SDimitry Andric }
64*0fca6ea1SDimitry Andric 
650b57cec5SDimitry Andric // Command line option to disable indirect-call promotion with the default as
660b57cec5SDimitry Andric // false. This is for debug purpose.
670b57cec5SDimitry Andric static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
680b57cec5SDimitry Andric                                 cl::desc("Disable indirect call promotion"));
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric // Set the cutoff value for the promotion. If the value is other than 0, we
710b57cec5SDimitry Andric // stop the transformation once the total number of promotions equals the cutoff
720b57cec5SDimitry Andric // value.
730b57cec5SDimitry Andric // For debug use only.
740b57cec5SDimitry Andric static cl::opt<unsigned>
7581ad6265SDimitry Andric     ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
760b57cec5SDimitry Andric               cl::desc("Max number of promotions for this compilation"));
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
790b57cec5SDimitry Andric // For debug use only.
800b57cec5SDimitry Andric static cl::opt<unsigned>
8181ad6265SDimitry Andric     ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
820b57cec5SDimitry Andric               cl::desc("Skip Callsite up to this number for this compilation"));
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric // Set if the pass is called in LTO optimization. The difference for LTO mode
850b57cec5SDimitry Andric // is the pass won't prefix the source module name to the internal linkage
860b57cec5SDimitry Andric // symbols.
870b57cec5SDimitry Andric static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
880b57cec5SDimitry Andric                                 cl::desc("Run indirect-call promotion in LTO "
890b57cec5SDimitry Andric                                          "mode"));
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric // Set if the pass is called in SamplePGO mode. The difference for SamplePGO
920b57cec5SDimitry Andric // mode is it will add prof metadatato the created direct call.
930b57cec5SDimitry Andric static cl::opt<bool>
940b57cec5SDimitry Andric     ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
950b57cec5SDimitry Andric                      cl::desc("Run indirect-call promotion in SamplePGO mode"));
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric // If the option is set to true, only call instructions will be considered for
980b57cec5SDimitry Andric // transformation -- invoke instructions will be ignored.
990b57cec5SDimitry Andric static cl::opt<bool>
1000b57cec5SDimitry Andric     ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
1010b57cec5SDimitry Andric                 cl::desc("Run indirect-call promotion for call instructions "
1020b57cec5SDimitry Andric                          "only"));
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric // If the option is set to true, only invoke instructions will be considered for
1050b57cec5SDimitry Andric // transformation -- call instructions will be ignored.
1060b57cec5SDimitry Andric static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
1070b57cec5SDimitry Andric                                    cl::Hidden,
1080b57cec5SDimitry Andric                                    cl::desc("Run indirect-call promotion for "
1090b57cec5SDimitry Andric                                             "invoke instruction only"));
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric // Dump the function level IR if the transformation happened in this
1120b57cec5SDimitry Andric // function. For debug use only.
1130b57cec5SDimitry Andric static cl::opt<bool>
1140b57cec5SDimitry Andric     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
1150b57cec5SDimitry Andric                  cl::desc("Dump IR after transformation happens"));
1160b57cec5SDimitry Andric 
117*0fca6ea1SDimitry Andric // Indirect call promotion pass will fall back to function-based comparison if
118*0fca6ea1SDimitry Andric // vtable-count / function-count is smaller than this threshold.
119*0fca6ea1SDimitry Andric static cl::opt<float> ICPVTablePercentageThreshold(
120*0fca6ea1SDimitry Andric     "icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden,
121*0fca6ea1SDimitry Andric     cl::desc("The percentage threshold of vtable-count / function-count for "
122*0fca6ea1SDimitry Andric              "cost-benefit analysis."));
123*0fca6ea1SDimitry Andric 
124*0fca6ea1SDimitry Andric // Although comparing vtables can save a vtable load, we may need to compare
125*0fca6ea1SDimitry Andric // vtable pointer with multiple vtable address points due to class inheritance.
126*0fca6ea1SDimitry Andric // Comparing with multiple vtables inserts additional instructions on hot code
127*0fca6ea1SDimitry Andric // path, and doing so for an earlier candidate delays the comparisons for later
128*0fca6ea1SDimitry Andric // candidates. For the last candidate, only the fallback path is affected.
129*0fca6ea1SDimitry Andric // We allow multiple vtable comparison for the last function candidate and use
130*0fca6ea1SDimitry Andric // the option below to cap the number of vtables.
131*0fca6ea1SDimitry Andric static cl::opt<int> ICPMaxNumVTableLastCandidate(
132*0fca6ea1SDimitry Andric     "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
133*0fca6ea1SDimitry Andric     cl::desc("The maximum number of vtable for the last candidate."));
134*0fca6ea1SDimitry Andric 
1350b57cec5SDimitry Andric namespace {
1360b57cec5SDimitry Andric 
137*0fca6ea1SDimitry Andric // The key is a vtable global variable, and the value is a map.
138*0fca6ea1SDimitry Andric // In the inner map, the key represents address point offsets and the value is a
139*0fca6ea1SDimitry Andric // constant for this address point.
140*0fca6ea1SDimitry Andric using VTableAddressPointOffsetValMap =
141*0fca6ea1SDimitry Andric     SmallDenseMap<const GlobalVariable *, std::unordered_map<int, Constant *>>;
142*0fca6ea1SDimitry Andric 
143*0fca6ea1SDimitry Andric // A struct to collect type information for a virtual call site.
144*0fca6ea1SDimitry Andric struct VirtualCallSiteInfo {
145*0fca6ea1SDimitry Andric   // The offset from the address point to virtual function in the vtable.
146*0fca6ea1SDimitry Andric   uint64_t FunctionOffset;
147*0fca6ea1SDimitry Andric   // The instruction that computes the address point of vtable.
148*0fca6ea1SDimitry Andric   Instruction *VPtr;
149*0fca6ea1SDimitry Andric   // The compatible type used in LLVM type intrinsics.
150*0fca6ea1SDimitry Andric   StringRef CompatibleTypeStr;
151*0fca6ea1SDimitry Andric };
152*0fca6ea1SDimitry Andric 
153*0fca6ea1SDimitry Andric // The key is a virtual call, and value is its type information.
154*0fca6ea1SDimitry Andric using VirtualCallSiteTypeInfoMap =
155*0fca6ea1SDimitry Andric     SmallDenseMap<const CallBase *, VirtualCallSiteInfo>;
156*0fca6ea1SDimitry Andric 
157*0fca6ea1SDimitry Andric // The key is vtable GUID, and value is its value profile count.
158*0fca6ea1SDimitry Andric using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
159*0fca6ea1SDimitry Andric 
160*0fca6ea1SDimitry Andric // Return the address point offset of the given compatible type.
161*0fca6ea1SDimitry Andric //
162*0fca6ea1SDimitry Andric // Type metadata of a vtable specifies the types that can contain a pointer to
163*0fca6ea1SDimitry Andric // this vtable, for example, `Base*` can be a pointer to an derived type
164*0fca6ea1SDimitry Andric // but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
165*0fca6ea1SDimitry Andric static std::optional<uint64_t>
166*0fca6ea1SDimitry Andric getAddressPointOffset(const GlobalVariable &VTableVar,
167*0fca6ea1SDimitry Andric                       StringRef CompatibleType) {
168*0fca6ea1SDimitry Andric   SmallVector<MDNode *> Types;
169*0fca6ea1SDimitry Andric   VTableVar.getMetadata(LLVMContext::MD_type, Types);
170*0fca6ea1SDimitry Andric 
171*0fca6ea1SDimitry Andric   for (MDNode *Type : Types)
172*0fca6ea1SDimitry Andric     if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
173*0fca6ea1SDimitry Andric         TypeId && TypeId->getString() == CompatibleType)
174*0fca6ea1SDimitry Andric       return cast<ConstantInt>(
175*0fca6ea1SDimitry Andric                  cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
176*0fca6ea1SDimitry Andric           ->getZExtValue();
177*0fca6ea1SDimitry Andric 
178*0fca6ea1SDimitry Andric   return std::nullopt;
179*0fca6ea1SDimitry Andric }
180*0fca6ea1SDimitry Andric 
181*0fca6ea1SDimitry Andric // Return a constant representing the vtable's address point specified by the
182*0fca6ea1SDimitry Andric // offset.
183*0fca6ea1SDimitry Andric static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
184*0fca6ea1SDimitry Andric                                              uint32_t AddressPointOffset) {
185*0fca6ea1SDimitry Andric   Module &M = *VTable->getParent();
186*0fca6ea1SDimitry Andric   LLVMContext &Context = M.getContext();
187*0fca6ea1SDimitry Andric   assert(AddressPointOffset <
188*0fca6ea1SDimitry Andric              M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
189*0fca6ea1SDimitry Andric          "Out-of-bound access");
190*0fca6ea1SDimitry Andric 
191*0fca6ea1SDimitry Andric   return ConstantExpr::getInBoundsGetElementPtr(
192*0fca6ea1SDimitry Andric       Type::getInt8Ty(Context), VTable,
193*0fca6ea1SDimitry Andric       llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
194*0fca6ea1SDimitry Andric }
195*0fca6ea1SDimitry Andric 
196*0fca6ea1SDimitry Andric // Return the basic block in which Use `U` is used via its `UserInst`.
197*0fca6ea1SDimitry Andric static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
198*0fca6ea1SDimitry Andric   if (PHINode *PN = dyn_cast<PHINode>(UserInst))
199*0fca6ea1SDimitry Andric     return PN->getIncomingBlock(U);
200*0fca6ea1SDimitry Andric 
201*0fca6ea1SDimitry Andric   return UserInst->getParent();
202*0fca6ea1SDimitry Andric }
203*0fca6ea1SDimitry Andric 
204*0fca6ea1SDimitry Andric // `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users
205*0fca6ea1SDimitry Andric // and all users are in `DestBB`. The caller guarantees that `Inst->getParent()`
206*0fca6ea1SDimitry Andric // is the sole predecessor of `DestBB` and `DestBB` is dominated by
207*0fca6ea1SDimitry Andric // `Inst->getParent()`.
208*0fca6ea1SDimitry Andric static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
209*0fca6ea1SDimitry Andric   // 'BB' is used only by assert.
210*0fca6ea1SDimitry Andric   [[maybe_unused]] BasicBlock *BB = Inst->getParent();
211*0fca6ea1SDimitry Andric 
212*0fca6ea1SDimitry Andric   assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 &&
213*0fca6ea1SDimitry Andric          DestBB->getUniquePredecessor() == BB &&
214*0fca6ea1SDimitry Andric          "Guaranteed by ICP transformation");
215*0fca6ea1SDimitry Andric 
216*0fca6ea1SDimitry Andric   BasicBlock *UserBB = nullptr;
217*0fca6ea1SDimitry Andric   for (Use &Use : Inst->uses()) {
218*0fca6ea1SDimitry Andric     User *User = Use.getUser();
219*0fca6ea1SDimitry Andric     // Do checked cast since IR verifier guarantees that the user of an
220*0fca6ea1SDimitry Andric     // instruction must be an instruction. See `Verifier::visitInstruction`.
221*0fca6ea1SDimitry Andric     Instruction *UserInst = cast<Instruction>(User);
222*0fca6ea1SDimitry Andric     // We can sink debug or pseudo instructions together with Inst.
223*0fca6ea1SDimitry Andric     if (UserInst->isDebugOrPseudoInst())
224*0fca6ea1SDimitry Andric       continue;
225*0fca6ea1SDimitry Andric     UserBB = getUserBasicBlock(Use, UserInst);
226*0fca6ea1SDimitry Andric     // Do not sink if Inst is used in a basic block that is not DestBB.
227*0fca6ea1SDimitry Andric     // TODO: Sink to the common dominator of all user blocks.
228*0fca6ea1SDimitry Andric     if (UserBB != DestBB)
229*0fca6ea1SDimitry Andric       return false;
230*0fca6ea1SDimitry Andric   }
231*0fca6ea1SDimitry Andric   return UserBB != nullptr;
232*0fca6ea1SDimitry Andric }
233*0fca6ea1SDimitry Andric 
234*0fca6ea1SDimitry Andric // For the virtual call dispatch sequence, try to sink vtable load instructions
235*0fca6ea1SDimitry Andric // to the cold indirect call fallback.
236*0fca6ea1SDimitry Andric // FIXME: Move the sink eligibility check below to a utility function in
237*0fca6ea1SDimitry Andric // Transforms/Utils/ directory.
238*0fca6ea1SDimitry Andric static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
239*0fca6ea1SDimitry Andric   if (!isDestBBSuitableForSink(I, DestBlock))
240*0fca6ea1SDimitry Andric     return false;
241*0fca6ea1SDimitry Andric 
242*0fca6ea1SDimitry Andric   // Do not move control-flow-involving, volatile loads, vaarg, alloca
243*0fca6ea1SDimitry Andric   // instructions, etc.
244*0fca6ea1SDimitry Andric   if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
245*0fca6ea1SDimitry Andric       isa<AllocaInst>(I))
246*0fca6ea1SDimitry Andric     return false;
247*0fca6ea1SDimitry Andric 
248*0fca6ea1SDimitry Andric   // Do not sink convergent call instructions.
249*0fca6ea1SDimitry Andric   if (const auto *C = dyn_cast<CallBase>(I))
250*0fca6ea1SDimitry Andric     if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
251*0fca6ea1SDimitry Andric       return false;
252*0fca6ea1SDimitry Andric 
253*0fca6ea1SDimitry Andric   // Do not move an instruction that may write to memory.
254*0fca6ea1SDimitry Andric   if (I->mayWriteToMemory())
255*0fca6ea1SDimitry Andric     return false;
256*0fca6ea1SDimitry Andric 
257*0fca6ea1SDimitry Andric   // We can only sink load instructions if there is nothing between the load and
258*0fca6ea1SDimitry Andric   // the end of block that could change the value.
259*0fca6ea1SDimitry Andric   if (I->mayReadFromMemory()) {
260*0fca6ea1SDimitry Andric     // We already know that SrcBlock is the unique predecessor of DestBlock.
261*0fca6ea1SDimitry Andric     for (BasicBlock::iterator Scan = std::next(I->getIterator()),
262*0fca6ea1SDimitry Andric                               E = I->getParent()->end();
263*0fca6ea1SDimitry Andric          Scan != E; ++Scan) {
264*0fca6ea1SDimitry Andric       // Note analysis analysis can tell whether two pointers can point to the
265*0fca6ea1SDimitry Andric       // same object in memory or not thereby find further opportunities to
266*0fca6ea1SDimitry Andric       // sink.
267*0fca6ea1SDimitry Andric       if (Scan->mayWriteToMemory())
268*0fca6ea1SDimitry Andric         return false;
269*0fca6ea1SDimitry Andric     }
270*0fca6ea1SDimitry Andric   }
271*0fca6ea1SDimitry Andric 
272*0fca6ea1SDimitry Andric   BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
273*0fca6ea1SDimitry Andric   I->moveBefore(*DestBlock, InsertPos);
274*0fca6ea1SDimitry Andric 
275*0fca6ea1SDimitry Andric   // TODO: Sink debug intrinsic users of I to 'DestBlock'.
276*0fca6ea1SDimitry Andric   // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
277*0fca6ea1SDimitry Andric   // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
278*0fca6ea1SDimitry Andric   // the core logic to do this.
279*0fca6ea1SDimitry Andric   return true;
280*0fca6ea1SDimitry Andric }
281*0fca6ea1SDimitry Andric 
282*0fca6ea1SDimitry Andric // Try to sink instructions after VPtr to the indirect call fallback.
283*0fca6ea1SDimitry Andric // Return the number of sunk IR instructions.
284*0fca6ea1SDimitry Andric static int tryToSinkInstructions(BasicBlock *OriginalBB,
285*0fca6ea1SDimitry Andric                                  BasicBlock *IndirectCallBB) {
286*0fca6ea1SDimitry Andric   int SinkCount = 0;
287*0fca6ea1SDimitry Andric   // Do not sink across a critical edge for simplicity.
288*0fca6ea1SDimitry Andric   if (IndirectCallBB->getUniquePredecessor() != OriginalBB)
289*0fca6ea1SDimitry Andric     return SinkCount;
290*0fca6ea1SDimitry Andric   // Sink all eligible instructions in OriginalBB in reverse order.
291*0fca6ea1SDimitry Andric   for (Instruction &I :
292*0fca6ea1SDimitry Andric        llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(*OriginalBB))))
293*0fca6ea1SDimitry Andric     if (tryToSinkInstruction(&I, IndirectCallBB))
294*0fca6ea1SDimitry Andric       SinkCount++;
295*0fca6ea1SDimitry Andric 
296*0fca6ea1SDimitry Andric   return SinkCount;
297*0fca6ea1SDimitry Andric }
298*0fca6ea1SDimitry Andric 
29906c3fb27SDimitry Andric // Promote indirect calls to conditional direct calls, keeping track of
30006c3fb27SDimitry Andric // thresholds.
30106c3fb27SDimitry Andric class IndirectCallPromoter {
3020b57cec5SDimitry Andric private:
3030b57cec5SDimitry Andric   Function &F;
304*0fca6ea1SDimitry Andric   Module &M;
305*0fca6ea1SDimitry Andric 
306*0fca6ea1SDimitry Andric   ProfileSummaryInfo *PSI = nullptr;
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric   // Symtab that maps indirect call profile values to function names and
3090b57cec5SDimitry Andric   // defines.
31006c3fb27SDimitry Andric   InstrProfSymtab *const Symtab;
3110b57cec5SDimitry Andric 
31206c3fb27SDimitry Andric   const bool SamplePGO;
3130b57cec5SDimitry Andric 
314*0fca6ea1SDimitry Andric   // A map from a virtual call to its type information.
315*0fca6ea1SDimitry Andric   const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
316*0fca6ea1SDimitry Andric 
317*0fca6ea1SDimitry Andric   VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
318*0fca6ea1SDimitry Andric 
3190b57cec5SDimitry Andric   OptimizationRemarkEmitter &ORE;
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   // A struct that records the direct target and it's call count.
3220b57cec5SDimitry Andric   struct PromotionCandidate {
32306c3fb27SDimitry Andric     Function *const TargetFunction;
32406c3fb27SDimitry Andric     const uint64_t Count;
3250b57cec5SDimitry Andric 
326*0fca6ea1SDimitry Andric     // The following fields only exists for promotion candidates with vtable
327*0fca6ea1SDimitry Andric     // information.
328*0fca6ea1SDimitry Andric     //
329*0fca6ea1SDimitry Andric     // Due to class inheritance, one virtual call candidate can come from
330*0fca6ea1SDimitry Andric     // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
331*0fca6ea1SDimitry Andric     // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
332*0fca6ea1SDimitry Andric     // points for comparison.
333*0fca6ea1SDimitry Andric     VTableGUIDCountsMap VTableGUIDAndCounts;
334*0fca6ea1SDimitry Andric     SmallVector<Constant *> AddressPoints;
335*0fca6ea1SDimitry Andric 
3360b57cec5SDimitry Andric     PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
3370b57cec5SDimitry Andric   };
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric   // Check if the indirect-call call site should be promoted. Return the number
3400b57cec5SDimitry Andric   // of promotions. Inst is the candidate indirect call, ValueDataRef
3410b57cec5SDimitry Andric   // contains the array of value profile data for profiled targets,
3420b57cec5SDimitry Andric   // TotalCount is the total profiled count of call executions, and
3430b57cec5SDimitry Andric   // NumCandidates is the number of candidate entries in ValueDataRef.
3440b57cec5SDimitry Andric   std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
345*0fca6ea1SDimitry Andric       const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
3460b57cec5SDimitry Andric       uint64_t TotalCount, uint32_t NumCandidates);
3470b57cec5SDimitry Andric 
348*0fca6ea1SDimitry Andric   // Promote a list of targets for one indirect-call callsite by comparing
349*0fca6ea1SDimitry Andric   // indirect callee with functions. Return true if there are IR
350*0fca6ea1SDimitry Andric   // transformations and false otherwise.
351*0fca6ea1SDimitry Andric   bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr,
352*0fca6ea1SDimitry Andric                                ArrayRef<PromotionCandidate> Candidates,
353*0fca6ea1SDimitry Andric                                uint64_t TotalCount,
354*0fca6ea1SDimitry Andric                                ArrayRef<InstrProfValueData> ICallProfDataRef,
355*0fca6ea1SDimitry Andric                                uint32_t NumCandidates,
356*0fca6ea1SDimitry Andric                                VTableGUIDCountsMap &VTableGUIDCounts);
357*0fca6ea1SDimitry Andric 
358*0fca6ea1SDimitry Andric   // Promote a list of targets for one indirect call by comparing vtables with
359*0fca6ea1SDimitry Andric   // functions. Return true if there are IR transformations and false
360*0fca6ea1SDimitry Andric   // otherwise.
361*0fca6ea1SDimitry Andric   bool tryToPromoteWithVTableCmp(
362*0fca6ea1SDimitry Andric       CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
363*0fca6ea1SDimitry Andric       uint64_t TotalFuncCount, uint32_t NumCandidates,
364*0fca6ea1SDimitry Andric       MutableArrayRef<InstrProfValueData> ICallProfDataRef,
365*0fca6ea1SDimitry Andric       VTableGUIDCountsMap &VTableGUIDCounts);
366*0fca6ea1SDimitry Andric 
367*0fca6ea1SDimitry Andric   // Return true if it's profitable to compare vtables for the callsite.
368*0fca6ea1SDimitry Andric   bool isProfitableToCompareVTables(const CallBase &CB,
369*0fca6ea1SDimitry Andric                                     ArrayRef<PromotionCandidate> Candidates,
370*0fca6ea1SDimitry Andric                                     uint64_t TotalCount);
371*0fca6ea1SDimitry Andric 
372*0fca6ea1SDimitry Andric   // Given an indirect callsite and the list of function candidates, compute
373*0fca6ea1SDimitry Andric   // the following vtable information in output parameters and return vtable
374*0fca6ea1SDimitry Andric   // pointer if type profiles exist.
375*0fca6ea1SDimitry Andric   // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof
376*0fca6ea1SDimitry Andric   // metadata attached on the vtable pointer.
377*0fca6ea1SDimitry Andric   // - For each function candidate, finds out the vtables from which it gets
378*0fca6ea1SDimitry Andric   // called and stores the <vtable-guid, count> in promotion candidate.
379*0fca6ea1SDimitry Andric   Instruction *computeVTableInfos(const CallBase *CB,
380*0fca6ea1SDimitry Andric                                   VTableGUIDCountsMap &VTableGUIDCounts,
381*0fca6ea1SDimitry Andric                                   std::vector<PromotionCandidate> &Candidates);
382*0fca6ea1SDimitry Andric 
383*0fca6ea1SDimitry Andric   Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
384*0fca6ea1SDimitry Andric                                              uint64_t AddressPointOffset);
385*0fca6ea1SDimitry Andric 
386*0fca6ea1SDimitry Andric   void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs,
387*0fca6ea1SDimitry Andric                                uint64_t Sum, uint32_t MaxMDCount);
388*0fca6ea1SDimitry Andric 
389*0fca6ea1SDimitry Andric   void updateVPtrValueProfiles(Instruction *VPtr,
390*0fca6ea1SDimitry Andric                                VTableGUIDCountsMap &VTableGUIDCounts);
3910b57cec5SDimitry Andric 
3920b57cec5SDimitry Andric public:
393*0fca6ea1SDimitry Andric   IndirectCallPromoter(
394*0fca6ea1SDimitry Andric       Function &Func, Module &M, ProfileSummaryInfo *PSI,
395*0fca6ea1SDimitry Andric       InstrProfSymtab *Symtab, bool SamplePGO,
396*0fca6ea1SDimitry Andric       const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
397*0fca6ea1SDimitry Andric       VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
39806c3fb27SDimitry Andric       OptimizationRemarkEmitter &ORE)
399*0fca6ea1SDimitry Andric       : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
400*0fca6ea1SDimitry Andric         VirtualCSInfo(VirtualCSInfo),
401*0fca6ea1SDimitry Andric         VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
40206c3fb27SDimitry Andric   IndirectCallPromoter(const IndirectCallPromoter &) = delete;
40306c3fb27SDimitry Andric   IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric   bool processFunction(ProfileSummaryInfo *PSI);
4060b57cec5SDimitry Andric };
4070b57cec5SDimitry Andric 
4080b57cec5SDimitry Andric } // end anonymous namespace
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric // Indirect-call promotion heuristic. The direct targets are sorted based on
4110b57cec5SDimitry Andric // the count. Stop at the first target that is not promoted.
41206c3fb27SDimitry Andric std::vector<IndirectCallPromoter::PromotionCandidate>
41306c3fb27SDimitry Andric IndirectCallPromoter::getPromotionCandidatesForCallSite(
414*0fca6ea1SDimitry Andric     const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
4150b57cec5SDimitry Andric     uint64_t TotalCount, uint32_t NumCandidates) {
4160b57cec5SDimitry Andric   std::vector<PromotionCandidate> Ret;
4170b57cec5SDimitry Andric 
4185ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
4190b57cec5SDimitry Andric                     << " Num_targets: " << ValueDataRef.size()
4200b57cec5SDimitry Andric                     << " Num_candidates: " << NumCandidates << "\n");
4210b57cec5SDimitry Andric   NumOfPGOICallsites++;
4220b57cec5SDimitry Andric   if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
4230b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << " Skip: User options.\n");
4240b57cec5SDimitry Andric     return Ret;
4250b57cec5SDimitry Andric   }
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric   for (uint32_t I = 0; I < NumCandidates; I++) {
4280b57cec5SDimitry Andric     uint64_t Count = ValueDataRef[I].Count;
4290b57cec5SDimitry Andric     assert(Count <= TotalCount);
430fe6060f1SDimitry Andric     (void)TotalCount;
4310b57cec5SDimitry Andric     uint64_t Target = ValueDataRef[I].Value;
4320b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
4330b57cec5SDimitry Andric                       << "  Target_func: " << Target << "\n");
4340b57cec5SDimitry Andric 
4355ffd83dbSDimitry Andric     if (ICPInvokeOnly && isa<CallInst>(CB)) {
4360b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
4370b57cec5SDimitry Andric       ORE.emit([&]() {
4385ffd83dbSDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
4390b57cec5SDimitry Andric                << " Not promote: User options";
4400b57cec5SDimitry Andric       });
4410b57cec5SDimitry Andric       break;
4420b57cec5SDimitry Andric     }
4435ffd83dbSDimitry Andric     if (ICPCallOnly && isa<InvokeInst>(CB)) {
4440b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
4450b57cec5SDimitry Andric       ORE.emit([&]() {
4465ffd83dbSDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
4470b57cec5SDimitry Andric                << " Not promote: User options";
4480b57cec5SDimitry Andric       });
4490b57cec5SDimitry Andric       break;
4500b57cec5SDimitry Andric     }
4510b57cec5SDimitry Andric     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
4520b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
4530b57cec5SDimitry Andric       ORE.emit([&]() {
4545ffd83dbSDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
4550b57cec5SDimitry Andric                << " Not promote: Cutoff reached";
4560b57cec5SDimitry Andric       });
4570b57cec5SDimitry Andric       break;
4580b57cec5SDimitry Andric     }
4590b57cec5SDimitry Andric 
460e8d8bef9SDimitry Andric     // Don't promote if the symbol is not defined in the module. This avoids
461e8d8bef9SDimitry Andric     // creating a reference to a symbol that doesn't exist in the module
462e8d8bef9SDimitry Andric     // This can happen when we compile with a sample profile collected from
463e8d8bef9SDimitry Andric     // one binary but used for another, which may have profiled targets that
464e8d8bef9SDimitry Andric     // aren't used in the new binary. We might have a declaration initially in
465e8d8bef9SDimitry Andric     // the case where the symbol is globally dead in the binary and removed by
466e8d8bef9SDimitry Andric     // ThinLTO.
4670b57cec5SDimitry Andric     Function *TargetFunction = Symtab->getFunction(Target);
468e8d8bef9SDimitry Andric     if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
4690b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
4700b57cec5SDimitry Andric       ORE.emit([&]() {
4715ffd83dbSDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
4720b57cec5SDimitry Andric                << "Cannot promote indirect call: target with md5sum "
4730b57cec5SDimitry Andric                << ore::NV("target md5sum", Target) << " not found";
4740b57cec5SDimitry Andric       });
4750b57cec5SDimitry Andric       break;
4760b57cec5SDimitry Andric     }
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric     const char *Reason = nullptr;
4795ffd83dbSDimitry Andric     if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
4800b57cec5SDimitry Andric       using namespace ore;
4810b57cec5SDimitry Andric 
4820b57cec5SDimitry Andric       ORE.emit([&]() {
4835ffd83dbSDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
4840b57cec5SDimitry Andric                << "Cannot promote indirect call to "
4850b57cec5SDimitry Andric                << NV("TargetFunction", TargetFunction) << " with count of "
4860b57cec5SDimitry Andric                << NV("Count", Count) << ": " << Reason;
4870b57cec5SDimitry Andric       });
4880b57cec5SDimitry Andric       break;
4890b57cec5SDimitry Andric     }
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric     Ret.push_back(PromotionCandidate(TargetFunction, Count));
4920b57cec5SDimitry Andric     TotalCount -= Count;
4930b57cec5SDimitry Andric   }
4940b57cec5SDimitry Andric   return Ret;
4950b57cec5SDimitry Andric }
4960b57cec5SDimitry Andric 
497*0fca6ea1SDimitry Andric Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
498*0fca6ea1SDimitry Andric     GlobalVariable *GV, uint64_t AddressPointOffset) {
499*0fca6ea1SDimitry Andric   auto [Iter, Inserted] =
500*0fca6ea1SDimitry Andric       VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr);
501*0fca6ea1SDimitry Andric   if (Inserted)
502*0fca6ea1SDimitry Andric     Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset);
503*0fca6ea1SDimitry Andric   return Iter->second;
504*0fca6ea1SDimitry Andric }
505*0fca6ea1SDimitry Andric 
506*0fca6ea1SDimitry Andric Instruction *IndirectCallPromoter::computeVTableInfos(
507*0fca6ea1SDimitry Andric     const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
508*0fca6ea1SDimitry Andric     std::vector<PromotionCandidate> &Candidates) {
509*0fca6ea1SDimitry Andric   if (!EnableVTableProfileUse)
510*0fca6ea1SDimitry Andric     return nullptr;
511*0fca6ea1SDimitry Andric 
512*0fca6ea1SDimitry Andric   // Take the following code sequence as an example, here is how the code works
513*0fca6ea1SDimitry Andric   //   @vtable1 = {[n x ptr] [... ptr @func1]}
514*0fca6ea1SDimitry Andric   //   @vtable2 = {[m x ptr] [... ptr @func2]}
515*0fca6ea1SDimitry Andric   //
516*0fca6ea1SDimitry Andric   //   %vptr = load ptr, ptr %d, !prof !0
517*0fca6ea1SDimitry Andric   //   %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
518*0fca6ea1SDimitry Andric   //   tail call void @llvm.assume(i1 %0)
519*0fca6ea1SDimitry Andric   //   %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
520*0fca6ea1SDimitry Andric   //   %1 = load ptr, ptr %vfn
521*0fca6ea1SDimitry Andric   //   call void %1(ptr %d), !prof !1
522*0fca6ea1SDimitry Andric   //
523*0fca6ea1SDimitry Andric   //   !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
524*0fca6ea1SDimitry Andric   //   !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
525*0fca6ea1SDimitry Andric   //
526*0fca6ea1SDimitry Andric   // Step 1. Find out the %vptr instruction for indirect call and use its !prof
527*0fca6ea1SDimitry Andric   // to populate `GUIDCountsMap`.
528*0fca6ea1SDimitry Andric   // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
529*0fca6ea1SDimitry Andric   // make vtable definitions visible across modules.
530*0fca6ea1SDimitry Andric   // Step 3. Compute the byte offset of the virtual call, by adding vtable
531*0fca6ea1SDimitry Andric   // address point offset and function's offset relative to vtable address
532*0fca6ea1SDimitry Andric   // point. For each function candidate, this step tells us the vtable from
533*0fca6ea1SDimitry Andric   // which it comes from, and the vtable address point to compare %vptr with.
534*0fca6ea1SDimitry Andric 
535*0fca6ea1SDimitry Andric   // Only virtual calls have virtual call site info.
536*0fca6ea1SDimitry Andric   auto Iter = VirtualCSInfo.find(CB);
537*0fca6ea1SDimitry Andric   if (Iter == VirtualCSInfo.end())
538*0fca6ea1SDimitry Andric     return nullptr;
539*0fca6ea1SDimitry Andric 
540*0fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #"
541*0fca6ea1SDimitry Andric                     << NumOfPGOICallsites << "\n");
542*0fca6ea1SDimitry Andric 
543*0fca6ea1SDimitry Andric   const auto &VirtualCallInfo = Iter->second;
544*0fca6ea1SDimitry Andric   Instruction *VPtr = VirtualCallInfo.VPtr;
545*0fca6ea1SDimitry Andric 
546*0fca6ea1SDimitry Andric   SmallDenseMap<Function *, int, 4> CalleeIndexMap;
547*0fca6ea1SDimitry Andric   for (size_t I = 0; I < Candidates.size(); I++)
548*0fca6ea1SDimitry Andric     CalleeIndexMap[Candidates[I].TargetFunction] = I;
549*0fca6ea1SDimitry Andric 
550*0fca6ea1SDimitry Andric   uint64_t TotalVTableCount = 0;
551*0fca6ea1SDimitry Andric   auto VTableValueDataArray =
552*0fca6ea1SDimitry Andric       getValueProfDataFromInst(*VirtualCallInfo.VPtr, IPVK_VTableTarget,
553*0fca6ea1SDimitry Andric                                MaxNumVTableAnnotations, TotalVTableCount);
554*0fca6ea1SDimitry Andric   if (VTableValueDataArray.empty())
555*0fca6ea1SDimitry Andric     return VPtr;
556*0fca6ea1SDimitry Andric 
557*0fca6ea1SDimitry Andric   // Compute the functions and counts from by each vtable.
558*0fca6ea1SDimitry Andric   for (const auto &V : VTableValueDataArray) {
559*0fca6ea1SDimitry Andric     uint64_t VTableVal = V.Value;
560*0fca6ea1SDimitry Andric     GUIDCountsMap[VTableVal] = V.Count;
561*0fca6ea1SDimitry Andric     GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal);
562*0fca6ea1SDimitry Andric     if (!VTableVar) {
563*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "  Cannot find vtable definition for " << VTableVal
564*0fca6ea1SDimitry Andric                         << "; maybe the vtable isn't imported\n");
565*0fca6ea1SDimitry Andric       continue;
566*0fca6ea1SDimitry Andric     }
567*0fca6ea1SDimitry Andric 
568*0fca6ea1SDimitry Andric     std::optional<uint64_t> MaybeAddressPointOffset =
569*0fca6ea1SDimitry Andric         getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
570*0fca6ea1SDimitry Andric     if (!MaybeAddressPointOffset)
571*0fca6ea1SDimitry Andric       continue;
572*0fca6ea1SDimitry Andric 
573*0fca6ea1SDimitry Andric     const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
574*0fca6ea1SDimitry Andric 
575*0fca6ea1SDimitry Andric     Function *Callee = nullptr;
576*0fca6ea1SDimitry Andric     std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
577*0fca6ea1SDimitry Andric         VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M);
578*0fca6ea1SDimitry Andric     if (!Callee)
579*0fca6ea1SDimitry Andric       continue;
580*0fca6ea1SDimitry Andric     auto CalleeIndexIter = CalleeIndexMap.find(Callee);
581*0fca6ea1SDimitry Andric     if (CalleeIndexIter == CalleeIndexMap.end())
582*0fca6ea1SDimitry Andric       continue;
583*0fca6ea1SDimitry Andric 
584*0fca6ea1SDimitry Andric     auto &Candidate = Candidates[CalleeIndexIter->second];
585*0fca6ea1SDimitry Andric     // There shouldn't be duplicate GUIDs in one !prof metadata (except
586*0fca6ea1SDimitry Andric     // duplicated zeros), so assign counters directly won't cause overwrite or
587*0fca6ea1SDimitry Andric     // counter loss.
588*0fca6ea1SDimitry Andric     Candidate.VTableGUIDAndCounts[VTableVal] = V.Count;
589*0fca6ea1SDimitry Andric     Candidate.AddressPoints.push_back(
590*0fca6ea1SDimitry Andric         getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset));
591*0fca6ea1SDimitry Andric   }
592*0fca6ea1SDimitry Andric 
593*0fca6ea1SDimitry Andric   return VPtr;
594*0fca6ea1SDimitry Andric }
595*0fca6ea1SDimitry Andric 
596*0fca6ea1SDimitry Andric // Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
597*0fca6ea1SDimitry Andric // Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
598*0fca6ea1SDimitry Andric static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
599*0fca6ea1SDimitry Andric                                    uint64_t FalseWeight) {
600*0fca6ea1SDimitry Andric   MDBuilder MDB(Context);
601*0fca6ea1SDimitry Andric   uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight));
602*0fca6ea1SDimitry Andric   return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale),
603*0fca6ea1SDimitry Andric                                  scaleBranchCount(FalseWeight, Scale));
604*0fca6ea1SDimitry Andric }
605*0fca6ea1SDimitry Andric 
6065ffd83dbSDimitry Andric CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
6070b57cec5SDimitry Andric                                          uint64_t Count, uint64_t TotalCount,
6080b57cec5SDimitry Andric                                          bool AttachProfToDirectCall,
6090b57cec5SDimitry Andric                                          OptimizationRemarkEmitter *ORE) {
610*0fca6ea1SDimitry Andric   CallBase &NewInst = promoteCallWithIfThenElse(
611*0fca6ea1SDimitry Andric       CB, DirectCallee,
612*0fca6ea1SDimitry Andric       createBranchWeights(CB.getContext(), Count, TotalCount - Count));
6130b57cec5SDimitry Andric 
614*0fca6ea1SDimitry Andric   if (AttachProfToDirectCall)
615*0fca6ea1SDimitry Andric     setBranchWeights(NewInst, {static_cast<uint32_t>(Count)},
616*0fca6ea1SDimitry Andric                      /*IsExpected=*/false);
6170b57cec5SDimitry Andric 
6180b57cec5SDimitry Andric   using namespace ore;
6190b57cec5SDimitry Andric 
6200b57cec5SDimitry Andric   if (ORE)
6210b57cec5SDimitry Andric     ORE->emit([&]() {
6225ffd83dbSDimitry Andric       return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
6230b57cec5SDimitry Andric              << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
6240b57cec5SDimitry Andric              << " with count " << NV("Count", Count) << " out of "
6250b57cec5SDimitry Andric              << NV("TotalCount", TotalCount);
6260b57cec5SDimitry Andric     });
6270b57cec5SDimitry Andric   return NewInst;
6280b57cec5SDimitry Andric }
6290b57cec5SDimitry Andric 
6300b57cec5SDimitry Andric // Promote indirect-call to conditional direct-call for one callsite.
631*0fca6ea1SDimitry Andric bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
632*0fca6ea1SDimitry Andric     CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
633*0fca6ea1SDimitry Andric     uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
634*0fca6ea1SDimitry Andric     uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
6350b57cec5SDimitry Andric   uint32_t NumPromoted = 0;
6360b57cec5SDimitry Andric 
637bdd1243dSDimitry Andric   for (const auto &C : Candidates) {
638*0fca6ea1SDimitry Andric     uint64_t FuncCount = C.Count;
639*0fca6ea1SDimitry Andric     pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
640*0fca6ea1SDimitry Andric                              SamplePGO, &ORE);
641*0fca6ea1SDimitry Andric     assert(TotalCount >= FuncCount);
642*0fca6ea1SDimitry Andric     TotalCount -= FuncCount;
6430b57cec5SDimitry Andric     NumOfPGOICallPromotion++;
6440b57cec5SDimitry Andric     NumPromoted++;
645*0fca6ea1SDimitry Andric 
646*0fca6ea1SDimitry Andric     if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
647*0fca6ea1SDimitry Andric       continue;
648*0fca6ea1SDimitry Andric 
649*0fca6ea1SDimitry Andric     // After a virtual call candidate gets promoted, update the vtable's counts
650*0fca6ea1SDimitry Andric     // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
651*0fca6ea1SDimitry Andric     // a vtable from which the virtual call is loaded. Compute the sum and use
652*0fca6ea1SDimitry Andric     // 128-bit APInt to improve accuracy.
653*0fca6ea1SDimitry Andric     uint64_t SumVTableCount = 0;
654*0fca6ea1SDimitry Andric     for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
655*0fca6ea1SDimitry Andric       SumVTableCount += VTableCount;
656*0fca6ea1SDimitry Andric 
657*0fca6ea1SDimitry Andric     for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
658*0fca6ea1SDimitry Andric       APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
659*0fca6ea1SDimitry Andric       APFuncCount *= VTableCount;
660*0fca6ea1SDimitry Andric       VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
6610b57cec5SDimitry Andric     }
662*0fca6ea1SDimitry Andric   }
663*0fca6ea1SDimitry Andric   if (NumPromoted == 0)
664*0fca6ea1SDimitry Andric     return false;
665*0fca6ea1SDimitry Andric 
666*0fca6ea1SDimitry Andric   assert(NumPromoted <= ICallProfDataRef.size() &&
667*0fca6ea1SDimitry Andric          "Number of promoted functions should not be greater than the number "
668*0fca6ea1SDimitry Andric          "of values in profile metadata");
669*0fca6ea1SDimitry Andric 
670*0fca6ea1SDimitry Andric   // Update value profiles on the indirect call.
671*0fca6ea1SDimitry Andric   updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
672*0fca6ea1SDimitry Andric                           NumCandidates);
673*0fca6ea1SDimitry Andric   updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
674*0fca6ea1SDimitry Andric   return true;
675*0fca6ea1SDimitry Andric }
676*0fca6ea1SDimitry Andric 
677*0fca6ea1SDimitry Andric void IndirectCallPromoter::updateFuncValueProfiles(
678*0fca6ea1SDimitry Andric     CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
679*0fca6ea1SDimitry Andric     uint32_t MaxMDCount) {
680*0fca6ea1SDimitry Andric   // First clear the existing !prof.
681*0fca6ea1SDimitry Andric   CB.setMetadata(LLVMContext::MD_prof, nullptr);
682*0fca6ea1SDimitry Andric   // Annotate the remaining value profiles if counter is not zero.
683*0fca6ea1SDimitry Andric   if (TotalCount != 0)
684*0fca6ea1SDimitry Andric     annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
685*0fca6ea1SDimitry Andric                       MaxMDCount);
686*0fca6ea1SDimitry Andric }
687*0fca6ea1SDimitry Andric 
688*0fca6ea1SDimitry Andric void IndirectCallPromoter::updateVPtrValueProfiles(
689*0fca6ea1SDimitry Andric     Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
690*0fca6ea1SDimitry Andric   if (!EnableVTableProfileUse || VPtr == nullptr ||
691*0fca6ea1SDimitry Andric       !VPtr->getMetadata(LLVMContext::MD_prof))
692*0fca6ea1SDimitry Andric     return;
693*0fca6ea1SDimitry Andric   VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
694*0fca6ea1SDimitry Andric   std::vector<InstrProfValueData> VTableValueProfiles;
695*0fca6ea1SDimitry Andric   uint64_t TotalVTableCount = 0;
696*0fca6ea1SDimitry Andric   for (auto [GUID, Count] : VTableGUIDCounts) {
697*0fca6ea1SDimitry Andric     if (Count == 0)
698*0fca6ea1SDimitry Andric       continue;
699*0fca6ea1SDimitry Andric 
700*0fca6ea1SDimitry Andric     VTableValueProfiles.push_back({GUID, Count});
701*0fca6ea1SDimitry Andric     TotalVTableCount += Count;
702*0fca6ea1SDimitry Andric   }
703*0fca6ea1SDimitry Andric   llvm::sort(VTableValueProfiles,
704*0fca6ea1SDimitry Andric              [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
705*0fca6ea1SDimitry Andric                return LHS.Count > RHS.Count;
706*0fca6ea1SDimitry Andric              });
707*0fca6ea1SDimitry Andric 
708*0fca6ea1SDimitry Andric   annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
709*0fca6ea1SDimitry Andric                     IPVK_VTableTarget, VTableValueProfiles.size());
710*0fca6ea1SDimitry Andric }
711*0fca6ea1SDimitry Andric 
712*0fca6ea1SDimitry Andric bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
713*0fca6ea1SDimitry Andric     CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
714*0fca6ea1SDimitry Andric     uint64_t TotalFuncCount, uint32_t NumCandidates,
715*0fca6ea1SDimitry Andric     MutableArrayRef<InstrProfValueData> ICallProfDataRef,
716*0fca6ea1SDimitry Andric     VTableGUIDCountsMap &VTableGUIDCounts) {
717*0fca6ea1SDimitry Andric   SmallVector<uint64_t, 4> PromotedFuncCount;
718*0fca6ea1SDimitry Andric 
719*0fca6ea1SDimitry Andric   for (const auto &Candidate : Candidates) {
720*0fca6ea1SDimitry Andric     for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
721*0fca6ea1SDimitry Andric       VTableGUIDCounts[GUID] -= Count;
722*0fca6ea1SDimitry Andric 
723*0fca6ea1SDimitry Andric     // 'OriginalBB' is the basic block of indirect call. After each candidate
724*0fca6ea1SDimitry Andric     // is promoted, a new basic block is created for the indirect fallback basic
725*0fca6ea1SDimitry Andric     // block and indirect call `CB` is moved into this new BB.
726*0fca6ea1SDimitry Andric     BasicBlock *OriginalBB = CB.getParent();
727*0fca6ea1SDimitry Andric     promoteCallWithVTableCmp(
728*0fca6ea1SDimitry Andric         CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
729*0fca6ea1SDimitry Andric         createBranchWeights(CB.getContext(), Candidate.Count,
730*0fca6ea1SDimitry Andric                             TotalFuncCount - Candidate.Count));
731*0fca6ea1SDimitry Andric 
732*0fca6ea1SDimitry Andric     int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
733*0fca6ea1SDimitry Andric 
734*0fca6ea1SDimitry Andric     ORE.emit([&]() {
735*0fca6ea1SDimitry Andric       OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB);
736*0fca6ea1SDimitry Andric 
737*0fca6ea1SDimitry Andric       const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
738*0fca6ea1SDimitry Andric       Remark << "Promote indirect call to "
739*0fca6ea1SDimitry Andric              << ore::NV("DirectCallee", Candidate.TargetFunction)
740*0fca6ea1SDimitry Andric              << " with count " << ore::NV("Count", Candidate.Count)
741*0fca6ea1SDimitry Andric              << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink "
742*0fca6ea1SDimitry Andric              << ore::NV("SinkCount", SinkCount)
743*0fca6ea1SDimitry Andric              << " instruction(s) and compare "
744*0fca6ea1SDimitry Andric              << ore::NV("VTable", VTableGUIDAndCounts.size())
745*0fca6ea1SDimitry Andric              << " vtable(s): {";
746*0fca6ea1SDimitry Andric 
747*0fca6ea1SDimitry Andric       // Sort GUIDs so remark message is deterministic.
748*0fca6ea1SDimitry Andric       std::set<uint64_t> GUIDSet;
749*0fca6ea1SDimitry Andric       for (auto [GUID, Count] : VTableGUIDAndCounts)
750*0fca6ea1SDimitry Andric         GUIDSet.insert(GUID);
751*0fca6ea1SDimitry Andric       for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) {
752*0fca6ea1SDimitry Andric         if (Iter != GUIDSet.begin())
753*0fca6ea1SDimitry Andric           Remark << ", ";
754*0fca6ea1SDimitry Andric         Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter));
755*0fca6ea1SDimitry Andric       }
756*0fca6ea1SDimitry Andric 
757*0fca6ea1SDimitry Andric       Remark << "}";
758*0fca6ea1SDimitry Andric 
759*0fca6ea1SDimitry Andric       return Remark;
760*0fca6ea1SDimitry Andric     });
761*0fca6ea1SDimitry Andric 
762*0fca6ea1SDimitry Andric     PromotedFuncCount.push_back(Candidate.Count);
763*0fca6ea1SDimitry Andric 
764*0fca6ea1SDimitry Andric     assert(TotalFuncCount >= Candidate.Count &&
765*0fca6ea1SDimitry Andric            "Within one prof metadata, total count is the sum of counts from "
766*0fca6ea1SDimitry Andric            "individual <target, count> pairs");
767*0fca6ea1SDimitry Andric     // Use std::min since 'TotalFuncCount' is the saturated sum of individual
768*0fca6ea1SDimitry Andric     // counts, see
769*0fca6ea1SDimitry Andric     // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
770*0fca6ea1SDimitry Andric     TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
771*0fca6ea1SDimitry Andric     NumOfPGOICallPromotion++;
772*0fca6ea1SDimitry Andric   }
773*0fca6ea1SDimitry Andric 
774*0fca6ea1SDimitry Andric   if (PromotedFuncCount.empty())
775*0fca6ea1SDimitry Andric     return false;
776*0fca6ea1SDimitry Andric 
777*0fca6ea1SDimitry Andric   // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
778*0fca6ea1SDimitry Andric   // a distinct 'VPtr'.
779*0fca6ea1SDimitry Andric   // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
780*0fca6ea1SDimitry Andric   // used to load multiple virtual functions. The vtable profiles needs to be
781*0fca6ea1SDimitry Andric   // updated properly in that case (e.g, for each indirect call annotate both
782*0fca6ea1SDimitry Andric   // type profiles and function profiles in one !prof).
783*0fca6ea1SDimitry Andric   for (size_t I = 0; I < PromotedFuncCount.size(); I++)
784*0fca6ea1SDimitry Andric     ICallProfDataRef[I].Count -=
785*0fca6ea1SDimitry Andric         std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
786*0fca6ea1SDimitry Andric   // Sort value profiles by count in descending order.
787*0fca6ea1SDimitry Andric   llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS,
788*0fca6ea1SDimitry Andric                                          const InstrProfValueData &RHS) {
789*0fca6ea1SDimitry Andric     return LHS.Count > RHS.Count;
790*0fca6ea1SDimitry Andric   });
791*0fca6ea1SDimitry Andric   // Drop the <target-value, count> pair if count is zero.
792*0fca6ea1SDimitry Andric   ArrayRef<InstrProfValueData> VDs(
793*0fca6ea1SDimitry Andric       ICallProfDataRef.begin(),
794*0fca6ea1SDimitry Andric       llvm::upper_bound(ICallProfDataRef, 0U,
795*0fca6ea1SDimitry Andric                         [](uint64_t Count, const InstrProfValueData &ProfData) {
796*0fca6ea1SDimitry Andric                           return ProfData.Count <= Count;
797*0fca6ea1SDimitry Andric                         }));
798*0fca6ea1SDimitry Andric   updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates);
799*0fca6ea1SDimitry Andric   updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
800*0fca6ea1SDimitry Andric   return true;
8010b57cec5SDimitry Andric }
8020b57cec5SDimitry Andric 
8030b57cec5SDimitry Andric // Traverse all the indirect-call callsite and get the value profile
8040b57cec5SDimitry Andric // annotation to perform indirect-call promotion.
80506c3fb27SDimitry Andric bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
8060b57cec5SDimitry Andric   bool Changed = false;
8070b57cec5SDimitry Andric   ICallPromotionAnalysis ICallAnalysis;
8085ffd83dbSDimitry Andric   for (auto *CB : findIndirectCalls(F)) {
809*0fca6ea1SDimitry Andric     uint32_t NumCandidates;
8100b57cec5SDimitry Andric     uint64_t TotalCount;
8110b57cec5SDimitry Andric     auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
812*0fca6ea1SDimitry Andric         CB, TotalCount, NumCandidates);
8130b57cec5SDimitry Andric     if (!NumCandidates ||
8140b57cec5SDimitry Andric         (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
8150b57cec5SDimitry Andric       continue;
816*0fca6ea1SDimitry Andric 
8170b57cec5SDimitry Andric     auto PromotionCandidates = getPromotionCandidatesForCallSite(
8185ffd83dbSDimitry Andric         *CB, ICallProfDataRef, TotalCount, NumCandidates);
8190b57cec5SDimitry Andric 
820*0fca6ea1SDimitry Andric     VTableGUIDCountsMap VTableGUIDCounts;
821*0fca6ea1SDimitry Andric     Instruction *VPtr =
822*0fca6ea1SDimitry Andric         computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
823*0fca6ea1SDimitry Andric 
824*0fca6ea1SDimitry Andric     if (isProfitableToCompareVTables(*CB, PromotionCandidates, TotalCount))
825*0fca6ea1SDimitry Andric       Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
826*0fca6ea1SDimitry Andric                                            TotalCount, NumCandidates,
827*0fca6ea1SDimitry Andric                                            ICallProfDataRef, VTableGUIDCounts);
828*0fca6ea1SDimitry Andric     else
829*0fca6ea1SDimitry Andric       Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
830*0fca6ea1SDimitry Andric                                          TotalCount, ICallProfDataRef,
831*0fca6ea1SDimitry Andric                                          NumCandidates, VTableGUIDCounts);
8320b57cec5SDimitry Andric   }
8330b57cec5SDimitry Andric   return Changed;
8340b57cec5SDimitry Andric }
8350b57cec5SDimitry Andric 
836*0fca6ea1SDimitry Andric // TODO: Return false if the function addressing and vtable load instructions
837*0fca6ea1SDimitry Andric // cannot sink to indirect fallback.
838*0fca6ea1SDimitry Andric bool IndirectCallPromoter::isProfitableToCompareVTables(
839*0fca6ea1SDimitry Andric     const CallBase &CB, ArrayRef<PromotionCandidate> Candidates,
840*0fca6ea1SDimitry Andric     uint64_t TotalCount) {
841*0fca6ea1SDimitry Andric   if (!EnableVTableProfileUse || Candidates.empty())
842*0fca6ea1SDimitry Andric     return false;
843*0fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #"
844*0fca6ea1SDimitry Andric                     << NumOfPGOICallsites << CB << "\n");
845*0fca6ea1SDimitry Andric   uint64_t RemainingVTableCount = TotalCount;
846*0fca6ea1SDimitry Andric   const size_t CandidateSize = Candidates.size();
847*0fca6ea1SDimitry Andric   for (size_t I = 0; I < CandidateSize; I++) {
848*0fca6ea1SDimitry Andric     auto &Candidate = Candidates[I];
849*0fca6ea1SDimitry Andric     auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
850*0fca6ea1SDimitry Andric 
851*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Candidate " << I << " FunctionCount: "
852*0fca6ea1SDimitry Andric                       << Candidate.Count << ", VTableCounts:");
853*0fca6ea1SDimitry Andric     // Add [[maybe_unused]] since <GUID, Count> are only used by LLVM_DEBUG.
854*0fca6ea1SDimitry Andric     for ([[maybe_unused]] auto &[GUID, Count] : VTableGUIDAndCounts)
855*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName()
856*0fca6ea1SDimitry Andric                         << ", " << Count << "}");
857*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "\n");
858*0fca6ea1SDimitry Andric 
859*0fca6ea1SDimitry Andric     uint64_t CandidateVTableCount = 0;
860*0fca6ea1SDimitry Andric     for (auto &[GUID, Count] : VTableGUIDAndCounts)
861*0fca6ea1SDimitry Andric       CandidateVTableCount += Count;
862*0fca6ea1SDimitry Andric 
863*0fca6ea1SDimitry Andric     if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
864*0fca6ea1SDimitry Andric       LLVM_DEBUG(
865*0fca6ea1SDimitry Andric           dbgs() << "    function count " << Candidate.Count
866*0fca6ea1SDimitry Andric                  << " and its vtable sum count " << CandidateVTableCount
867*0fca6ea1SDimitry Andric                  << " have discrepancies. Bail out vtable comparison.\n");
868*0fca6ea1SDimitry Andric       return false;
869*0fca6ea1SDimitry Andric     }
870*0fca6ea1SDimitry Andric 
871*0fca6ea1SDimitry Andric     RemainingVTableCount -= Candidate.Count;
872*0fca6ea1SDimitry Andric 
873*0fca6ea1SDimitry Andric     // 'MaxNumVTable' limits the number of vtables to make vtable comparison
874*0fca6ea1SDimitry Andric     // profitable. Comparing multiple vtables for one function candidate will
875*0fca6ea1SDimitry Andric     // insert additional instructions on the hot path, and allowing more than
876*0fca6ea1SDimitry Andric     // one vtable for non last candidates may or may not elongate the dependency
877*0fca6ea1SDimitry Andric     // chain for the subsequent candidates. Set its value to 1 for non-last
878*0fca6ea1SDimitry Andric     // candidate and allow option to override it for the last candidate.
879*0fca6ea1SDimitry Andric     int MaxNumVTable = 1;
880*0fca6ea1SDimitry Andric     if (I == CandidateSize - 1)
881*0fca6ea1SDimitry Andric       MaxNumVTable = ICPMaxNumVTableLastCandidate;
882*0fca6ea1SDimitry Andric 
883*0fca6ea1SDimitry Andric     if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
884*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "    allow at most " << MaxNumVTable << " and got "
885*0fca6ea1SDimitry Andric                         << Candidate.AddressPoints.size()
886*0fca6ea1SDimitry Andric                         << " vtables. Bail out for vtable comparison.\n");
887*0fca6ea1SDimitry Andric       return false;
888*0fca6ea1SDimitry Andric     }
889*0fca6ea1SDimitry Andric   }
890*0fca6ea1SDimitry Andric 
891*0fca6ea1SDimitry Andric   // If the indirect fallback is not cold, don't compare vtables.
892*0fca6ea1SDimitry Andric   if (PSI && PSI->hasProfileSummary() &&
893*0fca6ea1SDimitry Andric       !PSI->isColdCount(RemainingVTableCount)) {
894*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "    Indirect fallback basic block is not cold. Bail "
895*0fca6ea1SDimitry Andric                          "out for vtable comparison.\n");
896*0fca6ea1SDimitry Andric     return false;
897*0fca6ea1SDimitry Andric   }
898*0fca6ea1SDimitry Andric 
899*0fca6ea1SDimitry Andric   return true;
900*0fca6ea1SDimitry Andric }
901*0fca6ea1SDimitry Andric 
902*0fca6ea1SDimitry Andric // For virtual calls in the module, collect per-callsite information which will
903*0fca6ea1SDimitry Andric // be used to associate an ICP candidate with a vtable and a specific function
904*0fca6ea1SDimitry Andric // in the vtable. With type intrinsics (llvm.type.test), we can find virtual
905*0fca6ea1SDimitry Andric // calls in a compile-time efficient manner (by iterating its users) and more
906*0fca6ea1SDimitry Andric // importantly use the compatible type later to figure out the function byte
907*0fca6ea1SDimitry Andric // offset relative to the start of vtables.
908*0fca6ea1SDimitry Andric static void
909*0fca6ea1SDimitry Andric computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM,
910*0fca6ea1SDimitry Andric                                   VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
911*0fca6ea1SDimitry Andric   // Right now only llvm.type.test is used to find out virtual call sites.
912*0fca6ea1SDimitry Andric   // With ThinLTO and whole-program-devirtualization, llvm.type.test and
913*0fca6ea1SDimitry Andric   // llvm.public.type.test are emitted, and llvm.public.type.test is either
914*0fca6ea1SDimitry Andric   // refined to llvm.type.test or dropped before indirect-call-promotion pass.
915*0fca6ea1SDimitry Andric   //
916*0fca6ea1SDimitry Andric   // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
917*0fca6ea1SDimitry Andric   // Find out virtual calls by looking at users of llvm.type.checked.load in
918*0fca6ea1SDimitry Andric   // that case.
919*0fca6ea1SDimitry Andric   Function *TypeTestFunc =
920*0fca6ea1SDimitry Andric       M.getFunction(Intrinsic::getName(Intrinsic::type_test));
921*0fca6ea1SDimitry Andric   if (!TypeTestFunc || TypeTestFunc->use_empty())
922*0fca6ea1SDimitry Andric     return;
923*0fca6ea1SDimitry Andric 
924*0fca6ea1SDimitry Andric   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
925*0fca6ea1SDimitry Andric   auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
926*0fca6ea1SDimitry Andric     return FAM.getResult<DominatorTreeAnalysis>(F);
927*0fca6ea1SDimitry Andric   };
928*0fca6ea1SDimitry Andric   // Iterate all type.test calls to find all indirect calls.
929*0fca6ea1SDimitry Andric   for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
930*0fca6ea1SDimitry Andric     auto *CI = dyn_cast<CallInst>(U.getUser());
931*0fca6ea1SDimitry Andric     if (!CI)
932*0fca6ea1SDimitry Andric       continue;
933*0fca6ea1SDimitry Andric     auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
934*0fca6ea1SDimitry Andric     if (!TypeMDVal)
935*0fca6ea1SDimitry Andric       continue;
936*0fca6ea1SDimitry Andric     auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
937*0fca6ea1SDimitry Andric     if (!CompatibleTypeId)
938*0fca6ea1SDimitry Andric       continue;
939*0fca6ea1SDimitry Andric 
940*0fca6ea1SDimitry Andric     // Find out all devirtualizable call sites given a llvm.type.test
941*0fca6ea1SDimitry Andric     // intrinsic call.
942*0fca6ea1SDimitry Andric     SmallVector<DevirtCallSite, 1> DevirtCalls;
943*0fca6ea1SDimitry Andric     SmallVector<CallInst *, 1> Assumes;
944*0fca6ea1SDimitry Andric     auto &DT = LookupDomTree(*CI->getFunction());
945*0fca6ea1SDimitry Andric     findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
946*0fca6ea1SDimitry Andric 
947*0fca6ea1SDimitry Andric     for (auto &DevirtCall : DevirtCalls) {
948*0fca6ea1SDimitry Andric       CallBase &CB = DevirtCall.CB;
949*0fca6ea1SDimitry Andric       // Given an indirect call, try find the instruction which loads a
950*0fca6ea1SDimitry Andric       // pointer to virtual table.
951*0fca6ea1SDimitry Andric       Instruction *VTablePtr =
952*0fca6ea1SDimitry Andric           PGOIndirectCallVisitor::tryGetVTableInstruction(&CB);
953*0fca6ea1SDimitry Andric       if (!VTablePtr)
954*0fca6ea1SDimitry Andric         continue;
955*0fca6ea1SDimitry Andric       VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
956*0fca6ea1SDimitry Andric                             CompatibleTypeId->getString()};
957*0fca6ea1SDimitry Andric     }
958*0fca6ea1SDimitry Andric   }
959*0fca6ea1SDimitry Andric }
960*0fca6ea1SDimitry Andric 
9610b57cec5SDimitry Andric // A wrapper function that does the actual work.
96206c3fb27SDimitry Andric static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
96306c3fb27SDimitry Andric                                  bool SamplePGO, ModuleAnalysisManager &MAM) {
9640b57cec5SDimitry Andric   if (DisableICP)
9650b57cec5SDimitry Andric     return false;
9660b57cec5SDimitry Andric   InstrProfSymtab Symtab;
9670b57cec5SDimitry Andric   if (Error E = Symtab.create(M, InLTO)) {
9680b57cec5SDimitry Andric     std::string SymtabFailure = toString(std::move(E));
969fe6060f1SDimitry Andric     M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
9700b57cec5SDimitry Andric     return false;
9710b57cec5SDimitry Andric   }
9720b57cec5SDimitry Andric   bool Changed = false;
973*0fca6ea1SDimitry Andric   VirtualCallSiteTypeInfoMap VirtualCSInfo;
974*0fca6ea1SDimitry Andric 
975*0fca6ea1SDimitry Andric   if (EnableVTableProfileUse)
976*0fca6ea1SDimitry Andric     computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
977*0fca6ea1SDimitry Andric 
978*0fca6ea1SDimitry Andric   // VTableAddressPointOffsetVal stores the vtable address points. The vtable
979*0fca6ea1SDimitry Andric   // address point of a given <vtable, address point offset> is static (doesn't
980*0fca6ea1SDimitry Andric   // change after being computed once).
981*0fca6ea1SDimitry Andric   // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
982*0fca6ea1SDimitry Andric   // entry the first time a <vtable, offset> pair is seen, as
983*0fca6ea1SDimitry Andric   // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
984*0fca6ea1SDimitry Andric   // repeatedly on each function.
985*0fca6ea1SDimitry Andric   VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
986*0fca6ea1SDimitry Andric 
9870b57cec5SDimitry Andric   for (auto &F : M) {
9880b57cec5SDimitry Andric     if (F.isDeclaration() || F.hasOptNone())
9890b57cec5SDimitry Andric       continue;
9900b57cec5SDimitry Andric 
9910b57cec5SDimitry Andric     auto &FAM =
99206c3fb27SDimitry Andric         MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
99306c3fb27SDimitry Andric     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
9940b57cec5SDimitry Andric 
995*0fca6ea1SDimitry Andric     IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO,
996*0fca6ea1SDimitry Andric                                       VirtualCSInfo,
997*0fca6ea1SDimitry Andric                                       VTableAddressPointOffsetVal, ORE);
99806c3fb27SDimitry Andric     bool FuncChanged = CallPromoter.processFunction(PSI);
9990b57cec5SDimitry Andric     if (ICPDUMPAFTER && FuncChanged) {
10000b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
10010b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "\n");
10020b57cec5SDimitry Andric     }
10030b57cec5SDimitry Andric     Changed |= FuncChanged;
10040b57cec5SDimitry Andric     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
10050b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
10060b57cec5SDimitry Andric       break;
10070b57cec5SDimitry Andric     }
10080b57cec5SDimitry Andric   }
10090b57cec5SDimitry Andric   return Changed;
10100b57cec5SDimitry Andric }
10110b57cec5SDimitry Andric 
10120b57cec5SDimitry Andric PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
101306c3fb27SDimitry Andric                                                 ModuleAnalysisManager &MAM) {
101406c3fb27SDimitry Andric   ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
10150b57cec5SDimitry Andric 
10160b57cec5SDimitry Andric   if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
101706c3fb27SDimitry Andric                             SamplePGO | ICPSamplePGOMode, MAM))
10180b57cec5SDimitry Andric     return PreservedAnalyses::all();
10190b57cec5SDimitry Andric 
10200b57cec5SDimitry Andric   return PreservedAnalyses::none();
10210b57cec5SDimitry Andric }
1022