xref: /llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp (revision e14962a39cc6476bebba65e5639b74b0318c7fc3)
1 //===- InlineFunction.cpp - Code to perform function inlining -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inlining of a function into a call site, resolving
10 // parameters and the return value as appropriate.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/iterator_range.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/CallGraph.h"
25 #include "llvm/Analysis/CaptureTracking.h"
26 #include "llvm/Analysis/CtxProfAnalysis.h"
27 #include "llvm/Analysis/IndirectCallVisitor.h"
28 #include "llvm/Analysis/InstructionSimplify.h"
29 #include "llvm/Analysis/MemoryProfileInfo.h"
30 #include "llvm/Analysis/ObjCARCAnalysisUtils.h"
31 #include "llvm/Analysis/ObjCARCUtil.h"
32 #include "llvm/Analysis/ProfileSummaryInfo.h"
33 #include "llvm/Analysis/ValueTracking.h"
34 #include "llvm/Analysis/VectorUtils.h"
35 #include "llvm/IR/Argument.h"
36 #include "llvm/IR/AttributeMask.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/CFG.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/ConstantRange.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DataLayout.h"
44 #include "llvm/IR/DebugInfo.h"
45 #include "llvm/IR/DebugInfoMetadata.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Dominators.h"
49 #include "llvm/IR/EHPersonalities.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/GlobalVariable.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InlineAsm.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/IntrinsicInst.h"
58 #include "llvm/IR/Intrinsics.h"
59 #include "llvm/IR/LLVMContext.h"
60 #include "llvm/IR/MDBuilder.h"
61 #include "llvm/IR/Metadata.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/PatternMatch.h"
64 #include "llvm/IR/ProfDataUtils.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/User.h"
67 #include "llvm/IR/Value.h"
68 #include "llvm/Support/Casting.h"
69 #include "llvm/Support/CommandLine.h"
70 #include "llvm/Support/ErrorHandling.h"
71 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
72 #include "llvm/Transforms/Utils/Cloning.h"
73 #include "llvm/Transforms/Utils/Local.h"
74 #include "llvm/Transforms/Utils/ValueMapper.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstdint>
78 #include <deque>
79 #include <iterator>
80 #include <limits>
81 #include <optional>
82 #include <string>
83 #include <utility>
84 #include <vector>
85 
86 #define DEBUG_TYPE "inline-function"
87 
88 using namespace llvm;
89 using namespace llvm::memprof;
90 using ProfileCount = Function::ProfileCount;
91 
92 static cl::opt<bool>
93 EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
94   cl::Hidden,
95   cl::desc("Convert noalias attributes to metadata during inlining."));
96 
97 static cl::opt<bool>
98     UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
99                         cl::init(true),
100                         cl::desc("Use the llvm.experimental.noalias.scope.decl "
101                                  "intrinsic during inlining."));
102 
103 // Disabled by default, because the added alignment assumptions may increase
104 // compile-time and block optimizations. This option is not suitable for use
105 // with frontends that emit comprehensive parameter alignment annotations.
106 static cl::opt<bool>
107 PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
108   cl::init(false), cl::Hidden,
109   cl::desc("Convert align attributes to assumptions during inlining."));
110 
111 static cl::opt<unsigned> InlinerAttributeWindow(
112     "max-inst-checked-for-throw-during-inlining", cl::Hidden,
113     cl::desc("the maximum number of instructions analyzed for may throw during "
114              "attribute inference in inlined body"),
115     cl::init(4));
116 
117 namespace {
118 
119   /// A class for recording information about inlining a landing pad.
120   class LandingPadInliningInfo {
121     /// Destination of the invoke's unwind.
122     BasicBlock *OuterResumeDest;
123 
124     /// Destination for the callee's resume.
125     BasicBlock *InnerResumeDest = nullptr;
126 
127     /// LandingPadInst associated with the invoke.
128     LandingPadInst *CallerLPad = nullptr;
129 
130     /// PHI for EH values from landingpad insts.
131     PHINode *InnerEHValuesPHI = nullptr;
132 
133     SmallVector<Value*, 8> UnwindDestPHIValues;
134 
135   public:
136     LandingPadInliningInfo(InvokeInst *II)
137         : OuterResumeDest(II->getUnwindDest()) {
138       // If there are PHI nodes in the unwind destination block, we need to keep
139       // track of which values came into them from the invoke before removing
140       // the edge from this block.
141       BasicBlock *InvokeBB = II->getParent();
142       BasicBlock::iterator I = OuterResumeDest->begin();
143       for (; isa<PHINode>(I); ++I) {
144         // Save the value to use for this edge.
145         PHINode *PHI = cast<PHINode>(I);
146         UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
147       }
148 
149       CallerLPad = cast<LandingPadInst>(I);
150     }
151 
152     /// The outer unwind destination is the target of
153     /// unwind edges introduced for calls within the inlined function.
154     BasicBlock *getOuterResumeDest() const {
155       return OuterResumeDest;
156     }
157 
158     BasicBlock *getInnerResumeDest();
159 
160     LandingPadInst *getLandingPadInst() const { return CallerLPad; }
161 
162     /// Forward the 'resume' instruction to the caller's landing pad block.
163     /// When the landing pad block has only one predecessor, this is
164     /// a simple branch. When there is more than one predecessor, we need to
165     /// split the landing pad block after the landingpad instruction and jump
166     /// to there.
167     void forwardResume(ResumeInst *RI,
168                        SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
169 
170     /// Add incoming-PHI values to the unwind destination block for the given
171     /// basic block, using the values for the original invoke's source block.
172     void addIncomingPHIValuesFor(BasicBlock *BB) const {
173       addIncomingPHIValuesForInto(BB, OuterResumeDest);
174     }
175 
176     void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
177       BasicBlock::iterator I = dest->begin();
178       for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
179         PHINode *phi = cast<PHINode>(I);
180         phi->addIncoming(UnwindDestPHIValues[i], src);
181       }
182     }
183   };
184 } // end anonymous namespace
185 
186 static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) {
187   BasicBlock::iterator It = BB.getFirstNonPHIIt();
188   while (It != BB.end()) {
189     if (auto *IntrinsicCall = dyn_cast<ConvergenceControlInst>(It)) {
190       if (IntrinsicCall->isEntry()) {
191         return IntrinsicCall;
192       }
193     }
194     It = std::next(It);
195   }
196   return nullptr;
197 }
198 
199 /// Get or create a target for the branch from ResumeInsts.
200 BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
201   if (InnerResumeDest) return InnerResumeDest;
202 
203   // Split the landing pad.
204   BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
205   InnerResumeDest =
206     OuterResumeDest->splitBasicBlock(SplitPoint,
207                                      OuterResumeDest->getName() + ".body");
208 
209   // The number of incoming edges we expect to the inner landing pad.
210   const unsigned PHICapacity = 2;
211 
212   // Create corresponding new PHIs for all the PHIs in the outer landing pad.
213   BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
214   BasicBlock::iterator I = OuterResumeDest->begin();
215   for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
216     PHINode *OuterPHI = cast<PHINode>(I);
217     PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
218                                         OuterPHI->getName() + ".lpad-body");
219     InnerPHI->insertBefore(InsertPoint);
220     OuterPHI->replaceAllUsesWith(InnerPHI);
221     InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
222   }
223 
224   // Create a PHI for the exception values.
225   InnerEHValuesPHI =
226       PHINode::Create(CallerLPad->getType(), PHICapacity, "eh.lpad-body");
227   InnerEHValuesPHI->insertBefore(InsertPoint);
228   CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
229   InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
230 
231   // All done.
232   return InnerResumeDest;
233 }
234 
235 /// Forward the 'resume' instruction to the caller's landing pad block.
236 /// When the landing pad block has only one predecessor, this is a simple
237 /// branch. When there is more than one predecessor, we need to split the
238 /// landing pad block after the landingpad instruction and jump to there.
239 void LandingPadInliningInfo::forwardResume(
240     ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
241   BasicBlock *Dest = getInnerResumeDest();
242   BasicBlock *Src = RI->getParent();
243 
244   BranchInst::Create(Dest, Src);
245 
246   // Update the PHIs in the destination. They were inserted in an order which
247   // makes this work.
248   addIncomingPHIValuesForInto(Src, Dest);
249 
250   InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
251   RI->eraseFromParent();
252 }
253 
254 /// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
255 static Value *getParentPad(Value *EHPad) {
256   if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
257     return FPI->getParentPad();
258   return cast<CatchSwitchInst>(EHPad)->getParentPad();
259 }
260 
261 using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
262 
263 /// Helper for getUnwindDestToken that does the descendant-ward part of
264 /// the search.
265 static Value *getUnwindDestTokenHelper(Instruction *EHPad,
266                                        UnwindDestMemoTy &MemoMap) {
267   SmallVector<Instruction *, 8> Worklist(1, EHPad);
268 
269   while (!Worklist.empty()) {
270     Instruction *CurrentPad = Worklist.pop_back_val();
271     // We only put pads on the worklist that aren't in the MemoMap.  When
272     // we find an unwind dest for a pad we may update its ancestors, but
273     // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
274     // so they should never get updated while queued on the worklist.
275     assert(!MemoMap.count(CurrentPad));
276     Value *UnwindDestToken = nullptr;
277     if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {
278       if (CatchSwitch->hasUnwindDest()) {
279         UnwindDestToken = &*CatchSwitch->getUnwindDest()->getFirstNonPHIIt();
280       } else {
281         // Catchswitch doesn't have a 'nounwind' variant, and one might be
282         // annotated as "unwinds to caller" when really it's nounwind (see
283         // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
284         // parent's unwind dest from this.  We can check its catchpads'
285         // descendants, since they might include a cleanuppad with an
286         // "unwinds to caller" cleanupret, which can be trusted.
287         for (auto HI = CatchSwitch->handler_begin(),
288                   HE = CatchSwitch->handler_end();
289              HI != HE && !UnwindDestToken; ++HI) {
290           BasicBlock *HandlerBlock = *HI;
291           auto *CatchPad =
292               cast<CatchPadInst>(&*HandlerBlock->getFirstNonPHIIt());
293           for (User *Child : CatchPad->users()) {
294             // Intentionally ignore invokes here -- since the catchswitch is
295             // marked "unwind to caller", it would be a verifier error if it
296             // contained an invoke which unwinds out of it, so any invoke we'd
297             // encounter must unwind to some child of the catch.
298             if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))
299               continue;
300 
301             Instruction *ChildPad = cast<Instruction>(Child);
302             auto Memo = MemoMap.find(ChildPad);
303             if (Memo == MemoMap.end()) {
304               // Haven't figured out this child pad yet; queue it.
305               Worklist.push_back(ChildPad);
306               continue;
307             }
308             // We've already checked this child, but might have found that
309             // it offers no proof either way.
310             Value *ChildUnwindDestToken = Memo->second;
311             if (!ChildUnwindDestToken)
312               continue;
313             // We already know the child's unwind dest, which can either
314             // be ConstantTokenNone to indicate unwind to caller, or can
315             // be another child of the catchpad.  Only the former indicates
316             // the unwind dest of the catchswitch.
317             if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {
318               UnwindDestToken = ChildUnwindDestToken;
319               break;
320             }
321             assert(getParentPad(ChildUnwindDestToken) == CatchPad);
322           }
323         }
324       }
325     } else {
326       auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);
327       for (User *U : CleanupPad->users()) {
328         if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
329           if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
330             UnwindDestToken = &*RetUnwindDest->getFirstNonPHIIt();
331           else
332             UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());
333           break;
334         }
335         Value *ChildUnwindDestToken;
336         if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
337           ChildUnwindDestToken = &*Invoke->getUnwindDest()->getFirstNonPHIIt();
338         } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
339           Instruction *ChildPad = cast<Instruction>(U);
340           auto Memo = MemoMap.find(ChildPad);
341           if (Memo == MemoMap.end()) {
342             // Haven't resolved this child yet; queue it and keep searching.
343             Worklist.push_back(ChildPad);
344             continue;
345           }
346           // We've checked this child, but still need to ignore it if it
347           // had no proof either way.
348           ChildUnwindDestToken = Memo->second;
349           if (!ChildUnwindDestToken)
350             continue;
351         } else {
352           // Not a relevant user of the cleanuppad
353           continue;
354         }
355         // In a well-formed program, the child/invoke must either unwind to
356         // an(other) child of the cleanup, or exit the cleanup.  In the
357         // first case, continue searching.
358         if (isa<Instruction>(ChildUnwindDestToken) &&
359             getParentPad(ChildUnwindDestToken) == CleanupPad)
360           continue;
361         UnwindDestToken = ChildUnwindDestToken;
362         break;
363       }
364     }
365     // If we haven't found an unwind dest for CurrentPad, we may have queued its
366     // children, so move on to the next in the worklist.
367     if (!UnwindDestToken)
368       continue;
369 
370     // Now we know that CurrentPad unwinds to UnwindDestToken.  It also exits
371     // any ancestors of CurrentPad up to but not including UnwindDestToken's
372     // parent pad.  Record this in the memo map, and check to see if the
373     // original EHPad being queried is one of the ones exited.
374     Value *UnwindParent;
375     if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))
376       UnwindParent = getParentPad(UnwindPad);
377     else
378       UnwindParent = nullptr;
379     bool ExitedOriginalPad = false;
380     for (Instruction *ExitedPad = CurrentPad;
381          ExitedPad && ExitedPad != UnwindParent;
382          ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {
383       // Skip over catchpads since they just follow their catchswitches.
384       if (isa<CatchPadInst>(ExitedPad))
385         continue;
386       MemoMap[ExitedPad] = UnwindDestToken;
387       ExitedOriginalPad |= (ExitedPad == EHPad);
388     }
389 
390     if (ExitedOriginalPad)
391       return UnwindDestToken;
392 
393     // Continue the search.
394   }
395 
396   // No definitive information is contained within this funclet.
397   return nullptr;
398 }
399 
400 /// Given an EH pad, find where it unwinds.  If it unwinds to an EH pad,
401 /// return that pad instruction.  If it unwinds to caller, return
402 /// ConstantTokenNone.  If it does not have a definitive unwind destination,
403 /// return nullptr.
404 ///
405 /// This routine gets invoked for calls in funclets in inlinees when inlining
406 /// an invoke.  Since many funclets don't have calls inside them, it's queried
407 /// on-demand rather than building a map of pads to unwind dests up front.
408 /// Determining a funclet's unwind dest may require recursively searching its
409 /// descendants, and also ancestors and cousins if the descendants don't provide
410 /// an answer.  Since most funclets will have their unwind dest immediately
411 /// available as the unwind dest of a catchswitch or cleanupret, this routine
412 /// searches top-down from the given pad and then up. To avoid worst-case
413 /// quadratic run-time given that approach, it uses a memo map to avoid
414 /// re-processing funclet trees.  The callers that rewrite the IR as they go
415 /// take advantage of this, for correctness, by checking/forcing rewritten
416 /// pads' entries to match the original callee view.
417 static Value *getUnwindDestToken(Instruction *EHPad,
418                                  UnwindDestMemoTy &MemoMap) {
419   // Catchpads unwind to the same place as their catchswitch;
420   // redirct any queries on catchpads so the code below can
421   // deal with just catchswitches and cleanuppads.
422   if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))
423     EHPad = CPI->getCatchSwitch();
424 
425   // Check if we've already determined the unwind dest for this pad.
426   auto Memo = MemoMap.find(EHPad);
427   if (Memo != MemoMap.end())
428     return Memo->second;
429 
430   // Search EHPad and, if necessary, its descendants.
431   Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
432   assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
433   if (UnwindDestToken)
434     return UnwindDestToken;
435 
436   // No information is available for this EHPad from itself or any of its
437   // descendants.  An unwind all the way out to a pad in the caller would
438   // need also to agree with the unwind dest of the parent funclet, so
439   // search up the chain to try to find a funclet with information.  Put
440   // null entries in the memo map to avoid re-processing as we go up.
441   MemoMap[EHPad] = nullptr;
442 #ifndef NDEBUG
443   SmallPtrSet<Instruction *, 4> TempMemos;
444   TempMemos.insert(EHPad);
445 #endif
446   Instruction *LastUselessPad = EHPad;
447   Value *AncestorToken;
448   for (AncestorToken = getParentPad(EHPad);
449        auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);
450        AncestorToken = getParentPad(AncestorToken)) {
451     // Skip over catchpads since they just follow their catchswitches.
452     if (isa<CatchPadInst>(AncestorPad))
453       continue;
454     // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
455     // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
456     // call to getUnwindDestToken, that would mean that AncestorPad had no
457     // information in itself, its descendants, or its ancestors.  If that
458     // were the case, then we should also have recorded the lack of information
459     // for the descendant that we're coming from.  So assert that we don't
460     // find a null entry in the MemoMap for AncestorPad.
461     assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
462     auto AncestorMemo = MemoMap.find(AncestorPad);
463     if (AncestorMemo == MemoMap.end()) {
464       UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);
465     } else {
466       UnwindDestToken = AncestorMemo->second;
467     }
468     if (UnwindDestToken)
469       break;
470     LastUselessPad = AncestorPad;
471     MemoMap[LastUselessPad] = nullptr;
472 #ifndef NDEBUG
473     TempMemos.insert(LastUselessPad);
474 #endif
475   }
476 
477   // We know that getUnwindDestTokenHelper was called on LastUselessPad and
478   // returned nullptr (and likewise for EHPad and any of its ancestors up to
479   // LastUselessPad), so LastUselessPad has no information from below.  Since
480   // getUnwindDestTokenHelper must investigate all downward paths through
481   // no-information nodes to prove that a node has no information like this,
482   // and since any time it finds information it records it in the MemoMap for
483   // not just the immediately-containing funclet but also any ancestors also
484   // exited, it must be the case that, walking downward from LastUselessPad,
485   // visiting just those nodes which have not been mapped to an unwind dest
486   // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
487   // they are just used to keep getUnwindDestTokenHelper from repeating work),
488   // any node visited must have been exhaustively searched with no information
489   // for it found.
490   SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
491   while (!Worklist.empty()) {
492     Instruction *UselessPad = Worklist.pop_back_val();
493     auto Memo = MemoMap.find(UselessPad);
494     if (Memo != MemoMap.end() && Memo->second) {
495       // Here the name 'UselessPad' is a bit of a misnomer, because we've found
496       // that it is a funclet that does have information about unwinding to
497       // a particular destination; its parent was a useless pad.
498       // Since its parent has no information, the unwind edge must not escape
499       // the parent, and must target a sibling of this pad.  This local unwind
500       // gives us no information about EHPad.  Leave it and the subtree rooted
501       // at it alone.
502       assert(getParentPad(Memo->second) == getParentPad(UselessPad));
503       continue;
504     }
505     // We know we don't have information for UselesPad.  If it has an entry in
506     // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
507     // added on this invocation of getUnwindDestToken; if a previous invocation
508     // recorded nullptr, it would have had to prove that the ancestors of
509     // UselessPad, which include LastUselessPad, had no information, and that
510     // in turn would have required proving that the descendants of
511     // LastUselesPad, which include EHPad, have no information about
512     // LastUselessPad, which would imply that EHPad was mapped to nullptr in
513     // the MemoMap on that invocation, which isn't the case if we got here.
514     assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
515     // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
516     // information that we'd be contradicting by making a map entry for it
517     // (which is something that getUnwindDestTokenHelper must have proved for
518     // us to get here).  Just assert on is direct users here; the checks in
519     // this downward walk at its descendants will verify that they don't have
520     // any unwind edges that exit 'UselessPad' either (i.e. they either have no
521     // unwind edges or unwind to a sibling).
522     MemoMap[UselessPad] = UnwindDestToken;
523     if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
524       assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
525       for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
526         auto *CatchPad = &*HandlerBlock->getFirstNonPHIIt();
527         for (User *U : CatchPad->users()) {
528           assert((!isa<InvokeInst>(U) ||
529                   (getParentPad(&*cast<InvokeInst>(U)
530                                       ->getUnwindDest()
531                                       ->getFirstNonPHIIt()) == CatchPad)) &&
532                  "Expected useless pad");
533           if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
534             Worklist.push_back(cast<Instruction>(U));
535         }
536       }
537     } else {
538       assert(isa<CleanupPadInst>(UselessPad));
539       for (User *U : UselessPad->users()) {
540         assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
541         assert(
542             (!isa<InvokeInst>(U) ||
543              (getParentPad(
544                   &*cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHIIt()) ==
545               UselessPad)) &&
546             "Expected useless pad");
547         if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
548           Worklist.push_back(cast<Instruction>(U));
549       }
550     }
551   }
552 
553   return UnwindDestToken;
554 }
555 
556 /// When we inline a basic block into an invoke,
557 /// we have to turn all of the calls that can throw into invokes.
558 /// This function analyze BB to see if there are any calls, and if so,
559 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
560 /// nodes in that block with the values specified in InvokeDestPHIValues.
561 static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
562     BasicBlock *BB, BasicBlock *UnwindEdge,
563     UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
564   for (Instruction &I : llvm::make_early_inc_range(*BB)) {
565     // We only need to check for function calls: inlined invoke
566     // instructions require no special handling.
567     CallInst *CI = dyn_cast<CallInst>(&I);
568 
569     if (!CI || CI->doesNotThrow())
570       continue;
571 
572     // We do not need to (and in fact, cannot) convert possibly throwing calls
573     // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
574     // invokes.  The caller's "segment" of the deoptimization continuation
575     // attached to the newly inlined @llvm.experimental_deoptimize
576     // (resp. @llvm.experimental.guard) call should contain the exception
577     // handling logic, if any.
578     if (auto *F = CI->getCalledFunction())
579       if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
580           F->getIntrinsicID() == Intrinsic::experimental_guard)
581         continue;
582 
583     if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
584       // This call is nested inside a funclet.  If that funclet has an unwind
585       // destination within the inlinee, then unwinding out of this call would
586       // be UB.  Rewriting this call to an invoke which targets the inlined
587       // invoke's unwind dest would give the call's parent funclet multiple
588       // unwind destinations, which is something that subsequent EH table
589       // generation can't handle and that the veirifer rejects.  So when we
590       // see such a call, leave it as a call.
591       auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);
592       Value *UnwindDestToken =
593           getUnwindDestToken(FuncletPad, *FuncletUnwindMap);
594       if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
595         continue;
596 #ifndef NDEBUG
597       Instruction *MemoKey;
598       if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
599         MemoKey = CatchPad->getCatchSwitch();
600       else
601         MemoKey = FuncletPad;
602       assert(FuncletUnwindMap->count(MemoKey) &&
603              (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
604              "must get memoized to avoid confusing later searches");
605 #endif // NDEBUG
606     }
607 
608     changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
609     return BB;
610   }
611   return nullptr;
612 }
613 
614 /// If we inlined an invoke site, we need to convert calls
615 /// in the body of the inlined function into invokes.
616 ///
617 /// II is the invoke instruction being inlined.  FirstNewBlock is the first
618 /// block of the inlined code (the last block is the end of the function),
619 /// and InlineCodeInfo is information about the code that got inlined.
620 static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
621                                     ClonedCodeInfo &InlinedCodeInfo) {
622   BasicBlock *InvokeDest = II->getUnwindDest();
623 
624   Function *Caller = FirstNewBlock->getParent();
625 
626   // The inlined code is currently at the end of the function, scan from the
627   // start of the inlined code to its end, checking for stuff we need to
628   // rewrite.
629   LandingPadInliningInfo Invoke(II);
630 
631   // Get all of the inlined landing pad instructions.
632   SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
633   for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
634        I != E; ++I)
635     if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
636       InlinedLPads.insert(II->getLandingPadInst());
637 
638   // Append the clauses from the outer landing pad instruction into the inlined
639   // landing pad instructions.
640   LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
641   for (LandingPadInst *InlinedLPad : InlinedLPads) {
642     unsigned OuterNum = OuterLPad->getNumClauses();
643     InlinedLPad->reserveClauses(OuterNum);
644     for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
645       InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
646     if (OuterLPad->isCleanup())
647       InlinedLPad->setCleanup(true);
648   }
649 
650   for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
651        BB != E; ++BB) {
652     if (InlinedCodeInfo.ContainsCalls)
653       if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
654               &*BB, Invoke.getOuterResumeDest()))
655         // Update any PHI nodes in the exceptional block to indicate that there
656         // is now a new entry in them.
657         Invoke.addIncomingPHIValuesFor(NewBB);
658 
659     // Forward any resumes that are remaining here.
660     if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
661       Invoke.forwardResume(RI, InlinedLPads);
662   }
663 
664   // Now that everything is happy, we have one final detail.  The PHI nodes in
665   // the exception destination block still have entries due to the original
666   // invoke instruction. Eliminate these entries (which might even delete the
667   // PHI node) now.
668   InvokeDest->removePredecessor(II->getParent());
669 }
670 
671 /// If we inlined an invoke site, we need to convert calls
672 /// in the body of the inlined function into invokes.
673 ///
674 /// II is the invoke instruction being inlined.  FirstNewBlock is the first
675 /// block of the inlined code (the last block is the end of the function),
676 /// and InlineCodeInfo is information about the code that got inlined.
677 static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
678                                ClonedCodeInfo &InlinedCodeInfo) {
679   BasicBlock *UnwindDest = II->getUnwindDest();
680   Function *Caller = FirstNewBlock->getParent();
681 
682   assert(UnwindDest->getFirstNonPHIIt()->isEHPad() && "unexpected BasicBlock!");
683 
684   // If there are PHI nodes in the unwind destination block, we need to keep
685   // track of which values came into them from the invoke before removing the
686   // edge from this block.
687   SmallVector<Value *, 8> UnwindDestPHIValues;
688   BasicBlock *InvokeBB = II->getParent();
689   for (PHINode &PHI : UnwindDest->phis()) {
690     // Save the value to use for this edge.
691     UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB));
692   }
693 
694   // Add incoming-PHI values to the unwind destination block for the given basic
695   // block, using the values for the original invoke's source block.
696   auto UpdatePHINodes = [&](BasicBlock *Src) {
697     BasicBlock::iterator I = UnwindDest->begin();
698     for (Value *V : UnwindDestPHIValues) {
699       PHINode *PHI = cast<PHINode>(I);
700       PHI->addIncoming(V, Src);
701       ++I;
702     }
703   };
704 
705   // This connects all the instructions which 'unwind to caller' to the invoke
706   // destination.
707   UnwindDestMemoTy FuncletUnwindMap;
708   for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
709        BB != E; ++BB) {
710     if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
711       if (CRI->unwindsToCaller()) {
712         auto *CleanupPad = CRI->getCleanupPad();
713         CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI->getIterator());
714         CRI->eraseFromParent();
715         UpdatePHINodes(&*BB);
716         // Finding a cleanupret with an unwind destination would confuse
717         // subsequent calls to getUnwindDestToken, so map the cleanuppad
718         // to short-circuit any such calls and recognize this as an "unwind
719         // to caller" cleanup.
720         assert(!FuncletUnwindMap.count(CleanupPad) ||
721                isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
722         FuncletUnwindMap[CleanupPad] =
723             ConstantTokenNone::get(Caller->getContext());
724       }
725     }
726 
727     BasicBlock::iterator I = BB->getFirstNonPHIIt();
728     if (!I->isEHPad())
729       continue;
730 
731     Instruction *Replacement = nullptr;
732     if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
733       if (CatchSwitch->unwindsToCaller()) {
734         Value *UnwindDestToken;
735         if (auto *ParentPad =
736                 dyn_cast<Instruction>(CatchSwitch->getParentPad())) {
737           // This catchswitch is nested inside another funclet.  If that
738           // funclet has an unwind destination within the inlinee, then
739           // unwinding out of this catchswitch would be UB.  Rewriting this
740           // catchswitch to unwind to the inlined invoke's unwind dest would
741           // give the parent funclet multiple unwind destinations, which is
742           // something that subsequent EH table generation can't handle and
743           // that the veirifer rejects.  So when we see such a call, leave it
744           // as "unwind to caller".
745           UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);
746           if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
747             continue;
748         } else {
749           // This catchswitch has no parent to inherit constraints from, and
750           // none of its descendants can have an unwind edge that exits it and
751           // targets another funclet in the inlinee.  It may or may not have a
752           // descendant that definitively has an unwind to caller.  In either
753           // case, we'll have to assume that any unwinds out of it may need to
754           // be routed to the caller, so treat it as though it has a definitive
755           // unwind to caller.
756           UnwindDestToken = ConstantTokenNone::get(Caller->getContext());
757         }
758         auto *NewCatchSwitch = CatchSwitchInst::Create(
759             CatchSwitch->getParentPad(), UnwindDest,
760             CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
761             CatchSwitch->getIterator());
762         for (BasicBlock *PadBB : CatchSwitch->handlers())
763           NewCatchSwitch->addHandler(PadBB);
764         // Propagate info for the old catchswitch over to the new one in
765         // the unwind map.  This also serves to short-circuit any subsequent
766         // checks for the unwind dest of this catchswitch, which would get
767         // confused if they found the outer handler in the callee.
768         FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
769         Replacement = NewCatchSwitch;
770       }
771     } else if (!isa<FuncletPadInst>(I)) {
772       llvm_unreachable("unexpected EHPad!");
773     }
774 
775     if (Replacement) {
776       Replacement->takeName(&*I);
777       I->replaceAllUsesWith(Replacement);
778       I->eraseFromParent();
779       UpdatePHINodes(&*BB);
780     }
781   }
782 
783   if (InlinedCodeInfo.ContainsCalls)
784     for (Function::iterator BB = FirstNewBlock->getIterator(),
785                             E = Caller->end();
786          BB != E; ++BB)
787       if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
788               &*BB, UnwindDest, &FuncletUnwindMap))
789         // Update any PHI nodes in the exceptional block to indicate that there
790         // is now a new entry in them.
791         UpdatePHINodes(NewBB);
792 
793   // Now that everything is happy, we have one final detail.  The PHI nodes in
794   // the exception destination block still have entries due to the original
795   // invoke instruction. Eliminate these entries (which might even delete the
796   // PHI node) now.
797   UnwindDest->removePredecessor(InvokeBB);
798 }
799 
800 static bool haveCommonPrefix(MDNode *MIBStackContext,
801                              MDNode *CallsiteStackContext) {
802   assert(MIBStackContext->getNumOperands() > 0 &&
803          CallsiteStackContext->getNumOperands() > 0);
804   // Because of the context trimming performed during matching, the callsite
805   // context could have more stack ids than the MIB. We match up to the end of
806   // the shortest stack context.
807   for (auto MIBStackIter = MIBStackContext->op_begin(),
808             CallsiteStackIter = CallsiteStackContext->op_begin();
809        MIBStackIter != MIBStackContext->op_end() &&
810        CallsiteStackIter != CallsiteStackContext->op_end();
811        MIBStackIter++, CallsiteStackIter++) {
812     auto *Val1 = mdconst::dyn_extract<ConstantInt>(*MIBStackIter);
813     auto *Val2 = mdconst::dyn_extract<ConstantInt>(*CallsiteStackIter);
814     assert(Val1 && Val2);
815     if (Val1->getZExtValue() != Val2->getZExtValue())
816       return false;
817   }
818   return true;
819 }
820 
821 static void removeMemProfMetadata(CallBase *Call) {
822   Call->setMetadata(LLVMContext::MD_memprof, nullptr);
823 }
824 
825 static void removeCallsiteMetadata(CallBase *Call) {
826   Call->setMetadata(LLVMContext::MD_callsite, nullptr);
827 }
828 
829 static void updateMemprofMetadata(CallBase *CI,
830                                   const std::vector<Metadata *> &MIBList) {
831   assert(!MIBList.empty());
832   // Remove existing memprof, which will either be replaced or may not be needed
833   // if we are able to use a single allocation type function attribute.
834   removeMemProfMetadata(CI);
835   CallStackTrie CallStack;
836   for (Metadata *MIB : MIBList)
837     CallStack.addCallStack(cast<MDNode>(MIB));
838   bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
839   assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
840   if (!MemprofMDAttached)
841     // If we used a function attribute remove the callsite metadata as well.
842     removeCallsiteMetadata(CI);
843 }
844 
845 // Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
846 // inlined callee body, based on the callsite metadata InlinedCallsiteMD from
847 // the call that was inlined.
848 static void propagateMemProfHelper(const CallBase *OrigCall,
849                                    CallBase *ClonedCall,
850                                    MDNode *InlinedCallsiteMD) {
851   MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite);
852   MDNode *ClonedCallsiteMD = nullptr;
853   // Check if the call originally had callsite metadata, and update it for the
854   // new call in the inlined body.
855   if (OrigCallsiteMD) {
856     // The cloned call's context is now the concatenation of the original call's
857     // callsite metadata and the callsite metadata on the call where it was
858     // inlined.
859     ClonedCallsiteMD = MDNode::concatenate(OrigCallsiteMD, InlinedCallsiteMD);
860     ClonedCall->setMetadata(LLVMContext::MD_callsite, ClonedCallsiteMD);
861   }
862 
863   // Update any memprof metadata on the cloned call.
864   MDNode *OrigMemProfMD = ClonedCall->getMetadata(LLVMContext::MD_memprof);
865   if (!OrigMemProfMD)
866     return;
867   // We currently expect that allocations with memprof metadata also have
868   // callsite metadata for the allocation's part of the context.
869   assert(OrigCallsiteMD);
870 
871   // New call's MIB list.
872   std::vector<Metadata *> NewMIBList;
873 
874   // For each MIB metadata, check if its call stack context starts with the
875   // new clone's callsite metadata. If so, that MIB goes onto the cloned call in
876   // the inlined body. If not, it stays on the out-of-line original call.
877   for (auto &MIBOp : OrigMemProfMD->operands()) {
878     MDNode *MIB = dyn_cast<MDNode>(MIBOp);
879     // Stack is first operand of MIB.
880     MDNode *StackMD = getMIBStackNode(MIB);
881     assert(StackMD);
882     // See if the new cloned callsite context matches this profiled context.
883     if (haveCommonPrefix(StackMD, ClonedCallsiteMD))
884       // Add it to the cloned call's MIB list.
885       NewMIBList.push_back(MIB);
886   }
887   if (NewMIBList.empty()) {
888     removeMemProfMetadata(ClonedCall);
889     removeCallsiteMetadata(ClonedCall);
890     return;
891   }
892   if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
893     updateMemprofMetadata(ClonedCall, NewMIBList);
894 }
895 
896 // Update memprof related metadata (!memprof and !callsite) based on the
897 // inlining of Callee into the callsite at CB. The updates include merging the
898 // inlined callee's callsite metadata with that of the inlined call,
899 // and moving the subset of any memprof contexts to the inlined callee
900 // allocations if they match the new inlined call stack.
901 static void
902 propagateMemProfMetadata(Function *Callee, CallBase &CB,
903                          bool ContainsMemProfMetadata,
904                          const ValueMap<const Value *, WeakTrackingVH> &VMap) {
905   MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite);
906   // Only need to update if the inlined callsite had callsite metadata, or if
907   // there was any memprof metadata inlined.
908   if (!CallsiteMD && !ContainsMemProfMetadata)
909     return;
910 
911   // Propagate metadata onto the cloned calls in the inlined callee.
912   for (const auto &Entry : VMap) {
913     // See if this is a call that has been inlined and remapped, and not
914     // simplified away in the process.
915     auto *OrigCall = dyn_cast_or_null<CallBase>(Entry.first);
916     auto *ClonedCall = dyn_cast_or_null<CallBase>(Entry.second);
917     if (!OrigCall || !ClonedCall)
918       continue;
919     // If the inlined callsite did not have any callsite metadata, then it isn't
920     // involved in any profiled call contexts, and we can remove any memprof
921     // metadata on the cloned call.
922     if (!CallsiteMD) {
923       removeMemProfMetadata(ClonedCall);
924       removeCallsiteMetadata(ClonedCall);
925       continue;
926     }
927     propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD);
928   }
929 }
930 
931 /// When inlining a call site that has !llvm.mem.parallel_loop_access,
932 /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
933 /// be propagated to all memory-accessing cloned instructions.
934 static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
935                                       Function::iterator FEnd) {
936   MDNode *MemParallelLoopAccess =
937       CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
938   MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
939   MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope);
940   MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias);
941   if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
942     return;
943 
944   for (BasicBlock &BB : make_range(FStart, FEnd)) {
945     for (Instruction &I : BB) {
946       // This metadata is only relevant for instructions that access memory.
947       if (!I.mayReadOrWriteMemory())
948         continue;
949 
950       if (MemParallelLoopAccess) {
951         // TODO: This probably should not overwrite MemParalleLoopAccess.
952         MemParallelLoopAccess = MDNode::concatenate(
953             I.getMetadata(LLVMContext::MD_mem_parallel_loop_access),
954             MemParallelLoopAccess);
955         I.setMetadata(LLVMContext::MD_mem_parallel_loop_access,
956                       MemParallelLoopAccess);
957       }
958 
959       if (AccessGroup)
960         I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
961             I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
962 
963       if (AliasScope)
964         I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
965             I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));
966 
967       if (NoAlias)
968         I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
969             I.getMetadata(LLVMContext::MD_noalias), NoAlias));
970     }
971   }
972 }
973 
974 /// Bundle operands of the inlined function must be added to inlined call sites.
975 static void PropagateOperandBundles(Function::iterator InlinedBB,
976                                     Instruction *CallSiteEHPad) {
977   for (Instruction &II : llvm::make_early_inc_range(*InlinedBB)) {
978     CallBase *I = dyn_cast<CallBase>(&II);
979     if (!I)
980       continue;
981     // Skip call sites which already have a "funclet" bundle.
982     if (I->getOperandBundle(LLVMContext::OB_funclet))
983       continue;
984     // Skip call sites which are nounwind intrinsics (as long as they don't
985     // lower into regular function calls in the course of IR transformations).
986     auto *CalledFn =
987         dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts());
988     if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
989         !IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID()))
990       continue;
991 
992     SmallVector<OperandBundleDef, 1> OpBundles;
993     I->getOperandBundlesAsDefs(OpBundles);
994     OpBundles.emplace_back("funclet", CallSiteEHPad);
995 
996     Instruction *NewInst = CallBase::Create(I, OpBundles, I->getIterator());
997     NewInst->takeName(I);
998     I->replaceAllUsesWith(NewInst);
999     I->eraseFromParent();
1000   }
1001 }
1002 
1003 namespace {
1004 /// Utility for cloning !noalias and !alias.scope metadata. When a code region
1005 /// using scoped alias metadata is inlined, the aliasing relationships may not
1006 /// hold between the two version. It is necessary to create a deep clone of the
1007 /// metadata, putting the two versions in separate scope domains.
1008 class ScopedAliasMetadataDeepCloner {
1009   using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
1010   SetVector<const MDNode *> MD;
1011   MetadataMap MDMap;
1012   void addRecursiveMetadataUses();
1013 
1014 public:
1015   ScopedAliasMetadataDeepCloner(const Function *F);
1016 
1017   /// Create a new clone of the scoped alias metadata, which will be used by
1018   /// subsequent remap() calls.
1019   void clone();
1020 
1021   /// Remap instructions in the given range from the original to the cloned
1022   /// metadata.
1023   void remap(Function::iterator FStart, Function::iterator FEnd);
1024 };
1025 } // namespace
1026 
1027 ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
1028     const Function *F) {
1029   for (const BasicBlock &BB : *F) {
1030     for (const Instruction &I : BB) {
1031       if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
1032         MD.insert(M);
1033       if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
1034         MD.insert(M);
1035 
1036       // We also need to clone the metadata in noalias intrinsics.
1037       if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
1038         MD.insert(Decl->getScopeList());
1039     }
1040   }
1041   addRecursiveMetadataUses();
1042 }
1043 
1044 void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
1045   SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
1046   while (!Queue.empty()) {
1047     const MDNode *M = cast<MDNode>(Queue.pop_back_val());
1048     for (const Metadata *Op : M->operands())
1049       if (const MDNode *OpMD = dyn_cast<MDNode>(Op))
1050         if (MD.insert(OpMD))
1051           Queue.push_back(OpMD);
1052   }
1053 }
1054 
1055 void ScopedAliasMetadataDeepCloner::clone() {
1056   assert(MDMap.empty() && "clone() already called ?");
1057 
1058   SmallVector<TempMDTuple, 16> DummyNodes;
1059   for (const MDNode *I : MD) {
1060     DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), {}));
1061     MDMap[I].reset(DummyNodes.back().get());
1062   }
1063 
1064   // Create new metadata nodes to replace the dummy nodes, replacing old
1065   // metadata references with either a dummy node or an already-created new
1066   // node.
1067   SmallVector<Metadata *, 4> NewOps;
1068   for (const MDNode *I : MD) {
1069     for (const Metadata *Op : I->operands()) {
1070       if (const MDNode *M = dyn_cast<MDNode>(Op))
1071         NewOps.push_back(MDMap[M]);
1072       else
1073         NewOps.push_back(const_cast<Metadata *>(Op));
1074     }
1075 
1076     MDNode *NewM = MDNode::get(I->getContext(), NewOps);
1077     MDTuple *TempM = cast<MDTuple>(MDMap[I]);
1078     assert(TempM->isTemporary() && "Expected temporary node");
1079 
1080     TempM->replaceAllUsesWith(NewM);
1081     NewOps.clear();
1082   }
1083 }
1084 
1085 void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
1086                                           Function::iterator FEnd) {
1087   if (MDMap.empty())
1088     return; // Nothing to do.
1089 
1090   for (BasicBlock &BB : make_range(FStart, FEnd)) {
1091     for (Instruction &I : BB) {
1092       // TODO: The null checks for the MDMap.lookup() results should no longer
1093       // be necessary.
1094       if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
1095         if (MDNode *MNew = MDMap.lookup(M))
1096           I.setMetadata(LLVMContext::MD_alias_scope, MNew);
1097 
1098       if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
1099         if (MDNode *MNew = MDMap.lookup(M))
1100           I.setMetadata(LLVMContext::MD_noalias, MNew);
1101 
1102       if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
1103         if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
1104           Decl->setScopeList(MNew);
1105     }
1106   }
1107 }
1108 
1109 /// If the inlined function has noalias arguments,
1110 /// then add new alias scopes for each noalias argument, tag the mapped noalias
1111 /// parameters with noalias metadata specifying the new scope, and tag all
1112 /// non-derived loads, stores and memory intrinsics with the new alias scopes.
1113 static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1114                                   const DataLayout &DL, AAResults *CalleeAAR,
1115                                   ClonedCodeInfo &InlinedFunctionInfo) {
1116   if (!EnableNoAliasConversion)
1117     return;
1118 
1119   const Function *CalledFunc = CB.getCalledFunction();
1120   SmallVector<const Argument *, 4> NoAliasArgs;
1121 
1122   for (const Argument &Arg : CalledFunc->args())
1123     if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty())
1124       NoAliasArgs.push_back(&Arg);
1125 
1126   if (NoAliasArgs.empty())
1127     return;
1128 
1129   // To do a good job, if a noalias variable is captured, we need to know if
1130   // the capture point dominates the particular use we're considering.
1131   DominatorTree DT;
1132   DT.recalculate(const_cast<Function&>(*CalledFunc));
1133 
1134   // noalias indicates that pointer values based on the argument do not alias
1135   // pointer values which are not based on it. So we add a new "scope" for each
1136   // noalias function argument. Accesses using pointers based on that argument
1137   // become part of that alias scope, accesses using pointers not based on that
1138   // argument are tagged as noalias with that scope.
1139 
1140   DenseMap<const Argument *, MDNode *> NewScopes;
1141   MDBuilder MDB(CalledFunc->getContext());
1142 
1143   // Create a new scope domain for this function.
1144   MDNode *NewDomain =
1145     MDB.createAnonymousAliasScopeDomain(CalledFunc->getName());
1146   for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
1147     const Argument *A = NoAliasArgs[i];
1148 
1149     std::string Name = std::string(CalledFunc->getName());
1150     if (A->hasName()) {
1151       Name += ": %";
1152       Name += A->getName();
1153     } else {
1154       Name += ": argument ";
1155       Name += utostr(i);
1156     }
1157 
1158     // Note: We always create a new anonymous root here. This is true regardless
1159     // of the linkage of the callee because the aliasing "scope" is not just a
1160     // property of the callee, but also all control dependencies in the caller.
1161     MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
1162     NewScopes.insert(std::make_pair(A, NewScope));
1163 
1164     if (UseNoAliasIntrinsic) {
1165       // Introduce a llvm.experimental.noalias.scope.decl for the noalias
1166       // argument.
1167       MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope);
1168       auto *NoAliasDecl =
1169           IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList);
1170       // Ignore the result for now. The result will be used when the
1171       // llvm.noalias intrinsic is introduced.
1172       (void)NoAliasDecl;
1173     }
1174   }
1175 
1176   // Iterate over all new instructions in the map; for all memory-access
1177   // instructions, add the alias scope metadata.
1178   for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1179        VMI != VMIE; ++VMI) {
1180     if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {
1181       if (!VMI->second)
1182         continue;
1183 
1184       Instruction *NI = dyn_cast<Instruction>(VMI->second);
1185       if (!NI || InlinedFunctionInfo.isSimplified(I, NI))
1186         continue;
1187 
1188       bool IsArgMemOnlyCall = false, IsFuncCall = false;
1189       SmallVector<const Value *, 2> PtrArgs;
1190 
1191       if (const LoadInst *LI = dyn_cast<LoadInst>(I))
1192         PtrArgs.push_back(LI->getPointerOperand());
1193       else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
1194         PtrArgs.push_back(SI->getPointerOperand());
1195       else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
1196         PtrArgs.push_back(VAAI->getPointerOperand());
1197       else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
1198         PtrArgs.push_back(CXI->getPointerOperand());
1199       else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
1200         PtrArgs.push_back(RMWI->getPointerOperand());
1201       else if (const auto *Call = dyn_cast<CallBase>(I)) {
1202         // If we know that the call does not access memory, then we'll still
1203         // know that about the inlined clone of this call site, and we don't
1204         // need to add metadata.
1205         if (Call->doesNotAccessMemory())
1206           continue;
1207 
1208         IsFuncCall = true;
1209         if (CalleeAAR) {
1210           MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
1211 
1212           // We'll retain this knowledge without additional metadata.
1213           if (ME.onlyAccessesInaccessibleMem())
1214             continue;
1215 
1216           if (ME.onlyAccessesArgPointees())
1217             IsArgMemOnlyCall = true;
1218         }
1219 
1220         for (Value *Arg : Call->args()) {
1221           // Only care about pointer arguments. If a noalias argument is
1222           // accessed through a non-pointer argument, it must be captured
1223           // first (e.g. via ptrtoint), and we protect against captures below.
1224           if (!Arg->getType()->isPointerTy())
1225             continue;
1226 
1227           PtrArgs.push_back(Arg);
1228         }
1229       }
1230 
1231       // If we found no pointers, then this instruction is not suitable for
1232       // pairing with an instruction to receive aliasing metadata.
1233       // However, if this is a call, this we might just alias with none of the
1234       // noalias arguments.
1235       if (PtrArgs.empty() && !IsFuncCall)
1236         continue;
1237 
1238       // It is possible that there is only one underlying object, but you
1239       // need to go through several PHIs to see it, and thus could be
1240       // repeated in the Objects list.
1241       SmallPtrSet<const Value *, 4> ObjSet;
1242       SmallVector<Metadata *, 4> Scopes, NoAliases;
1243 
1244       for (const Value *V : PtrArgs) {
1245         SmallVector<const Value *, 4> Objects;
1246         getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
1247 
1248         for (const Value *O : Objects)
1249           ObjSet.insert(O);
1250       }
1251 
1252       // Figure out if we're derived from anything that is not a noalias
1253       // argument.
1254       bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
1255            UsesUnknownObject = false;
1256       for (const Value *V : ObjSet) {
1257         // Is this value a constant that cannot be derived from any pointer
1258         // value (we need to exclude constant expressions, for example, that
1259         // are formed from arithmetic on global symbols).
1260         bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) ||
1261                              isa<ConstantPointerNull>(V) ||
1262                              isa<ConstantDataVector>(V) || isa<UndefValue>(V);
1263         if (IsNonPtrConst)
1264           continue;
1265 
1266         // If this is anything other than a noalias argument, then we cannot
1267         // completely describe the aliasing properties using alias.scope
1268         // metadata (and, thus, won't add any).
1269         if (const Argument *A = dyn_cast<Argument>(V)) {
1270           if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias))
1271             UsesAliasingPtr = true;
1272         } else {
1273           UsesAliasingPtr = true;
1274         }
1275 
1276         if (isEscapeSource(V)) {
1277           // An escape source can only alias with a noalias argument if it has
1278           // been captured beforehand.
1279           RequiresNoCaptureBefore = true;
1280         } else if (!isa<Argument>(V) && !isIdentifiedObject(V)) {
1281           // If this is neither an escape source, nor some identified object
1282           // (which cannot directly alias a noalias argument), nor some other
1283           // argument (which, by definition, also cannot alias a noalias
1284           // argument), conservatively do not make any assumptions.
1285           UsesUnknownObject = true;
1286         }
1287       }
1288 
1289       // Nothing we can do if the used underlying object cannot be reliably
1290       // determined.
1291       if (UsesUnknownObject)
1292         continue;
1293 
1294       // A function call can always get captured noalias pointers (via other
1295       // parameters, globals, etc.).
1296       if (IsFuncCall && !IsArgMemOnlyCall)
1297         RequiresNoCaptureBefore = true;
1298 
1299       // First, we want to figure out all of the sets with which we definitely
1300       // don't alias. Iterate over all noalias set, and add those for which:
1301       //   1. The noalias argument is not in the set of objects from which we
1302       //      definitely derive.
1303       //   2. The noalias argument has not yet been captured.
1304       // An arbitrary function that might load pointers could see captured
1305       // noalias arguments via other noalias arguments or globals, and so we
1306       // must always check for prior capture.
1307       for (const Argument *A : NoAliasArgs) {
1308         if (ObjSet.contains(A))
1309           continue; // May be based on a noalias argument.
1310 
1311         // It might be tempting to skip the PointerMayBeCapturedBefore check if
1312         // A->hasNoCaptureAttr() is true, but this is incorrect because
1313         // nocapture only guarantees that no copies outlive the function, not
1314         // that the value cannot be locally captured.
1315         if (!RequiresNoCaptureBefore ||
1316             !PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false,
1317                                         /* StoreCaptures */ false, I, &DT))
1318           NoAliases.push_back(NewScopes[A]);
1319       }
1320 
1321       if (!NoAliases.empty())
1322         NI->setMetadata(LLVMContext::MD_noalias,
1323                         MDNode::concatenate(
1324                             NI->getMetadata(LLVMContext::MD_noalias),
1325                             MDNode::get(CalledFunc->getContext(), NoAliases)));
1326 
1327       // Next, we want to figure out all of the sets to which we might belong.
1328       // We might belong to a set if the noalias argument is in the set of
1329       // underlying objects. If there is some non-noalias argument in our list
1330       // of underlying objects, then we cannot add a scope because the fact
1331       // that some access does not alias with any set of our noalias arguments
1332       // cannot itself guarantee that it does not alias with this access
1333       // (because there is some pointer of unknown origin involved and the
1334       // other access might also depend on this pointer). We also cannot add
1335       // scopes to arbitrary functions unless we know they don't access any
1336       // non-parameter pointer-values.
1337       bool CanAddScopes = !UsesAliasingPtr;
1338       if (CanAddScopes && IsFuncCall)
1339         CanAddScopes = IsArgMemOnlyCall;
1340 
1341       if (CanAddScopes)
1342         for (const Argument *A : NoAliasArgs) {
1343           if (ObjSet.count(A))
1344             Scopes.push_back(NewScopes[A]);
1345         }
1346 
1347       if (!Scopes.empty())
1348         NI->setMetadata(
1349             LLVMContext::MD_alias_scope,
1350             MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),
1351                                 MDNode::get(CalledFunc->getContext(), Scopes)));
1352     }
1353   }
1354 }
1355 
1356 static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
1357                                                    ReturnInst *End) {
1358 
1359   assert(Begin->getParent() == End->getParent() &&
1360          "Expected to be in same basic block!");
1361   auto BeginIt = Begin->getIterator();
1362   assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
1363   return !llvm::isGuaranteedToTransferExecutionToSuccessor(
1364       ++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
1365 }
1366 
1367 // Add attributes from CB params and Fn attributes that can always be propagated
1368 // to the corresponding argument / inner callbases.
1369 static void AddParamAndFnBasicAttributes(const CallBase &CB,
1370                                          ValueToValueMapTy &VMap,
1371                                          ClonedCodeInfo &InlinedFunctionInfo) {
1372   auto *CalledFunction = CB.getCalledFunction();
1373   auto &Context = CalledFunction->getContext();
1374 
1375   // Collect valid attributes for all params.
1376   SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
1377   bool HasAttrToPropagate = false;
1378 
1379   // Attributes we can only propagate if the exact parameter is forwarded.
1380   // We can propagate both poison generating and UB generating attributes
1381   // without any extra checks. The only attribute that is tricky to propagate
1382   // is `noundef` (skipped for now) as that can create new UB where previous
1383   // behavior was just using a poison value.
1384   static const Attribute::AttrKind ExactAttrsToPropagate[] = {
1385       Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
1386       Attribute::NonNull, Attribute::Alignment, Attribute::Range};
1387 
1388   for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
1389     ValidObjParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
1390     ValidExactParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
1391     // Access attributes can be propagated to any param with the same underlying
1392     // object as the argument.
1393     if (CB.paramHasAttr(I, Attribute::ReadNone))
1394       ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
1395     if (CB.paramHasAttr(I, Attribute::ReadOnly))
1396       ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
1397 
1398     for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
1399       Attribute Attr = CB.getParamAttr(I, AK);
1400       if (Attr.isValid())
1401         ValidExactParamAttrs.back().addAttribute(Attr);
1402     }
1403 
1404     HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
1405     HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
1406   }
1407 
1408   // Won't be able to propagate anything.
1409   if (!HasAttrToPropagate)
1410     return;
1411 
1412   for (BasicBlock &BB : *CalledFunction) {
1413     for (Instruction &Ins : BB) {
1414       const auto *InnerCB = dyn_cast<CallBase>(&Ins);
1415       if (!InnerCB)
1416         continue;
1417       auto *NewInnerCB = dyn_cast_or_null<CallBase>(VMap.lookup(InnerCB));
1418       if (!NewInnerCB)
1419         continue;
1420       // The InnerCB might have be simplified during the inlining
1421       // process which can make propagation incorrect.
1422       if (InlinedFunctionInfo.isSimplified(InnerCB, NewInnerCB))
1423         continue;
1424 
1425       AttributeList AL = NewInnerCB->getAttributes();
1426       for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
1427         // It's unsound or requires special handling to propagate
1428         // attributes to byval arguments. Even if CalledFunction
1429         // doesn't e.g. write to the argument (readonly), the call to
1430         // NewInnerCB may write to its by-value copy.
1431         if (NewInnerCB->paramHasAttr(I, Attribute::ByVal))
1432           continue;
1433 
1434         // Don't bother propagating attrs to constants.
1435         if (match(NewInnerCB->getArgOperand(I),
1436                   llvm::PatternMatch::m_ImmConstant()))
1437           continue;
1438 
1439         // Check if the underlying value for the parameter is an argument.
1440         const Argument *Arg = dyn_cast<Argument>(InnerCB->getArgOperand(I));
1441         unsigned ArgNo;
1442         if (Arg) {
1443           ArgNo = Arg->getArgNo();
1444           // For dereferenceable, dereferenceable_or_null, align, etc...
1445           // we don't want to propagate if the existing param has the same
1446           // attribute with "better" constraints. So  remove from the
1447           // new AL if the region of the existing param is larger than
1448           // what we can propagate.
1449           AttrBuilder NewAB{
1450               Context, AttributeSet::get(Context, ValidExactParamAttrs[ArgNo])};
1451           if (AL.getParamDereferenceableBytes(I) >
1452               NewAB.getDereferenceableBytes())
1453             NewAB.removeAttribute(Attribute::Dereferenceable);
1454           if (AL.getParamDereferenceableOrNullBytes(I) >
1455               NewAB.getDereferenceableOrNullBytes())
1456             NewAB.removeAttribute(Attribute::DereferenceableOrNull);
1457           if (AL.getParamAlignment(I).valueOrOne() >
1458               NewAB.getAlignment().valueOrOne())
1459             NewAB.removeAttribute(Attribute::Alignment);
1460           if (auto ExistingRange = AL.getParamRange(I)) {
1461             if (auto NewRange = NewAB.getRange()) {
1462               ConstantRange CombinedRange =
1463                   ExistingRange->intersectWith(*NewRange);
1464               NewAB.removeAttribute(Attribute::Range);
1465               NewAB.addRangeAttr(CombinedRange);
1466             }
1467           }
1468           AL = AL.addParamAttributes(Context, I, NewAB);
1469         } else if (NewInnerCB->getArgOperand(I)->getType()->isPointerTy()) {
1470           // Check if the underlying value for the parameter is an argument.
1471           const Value *UnderlyingV =
1472               getUnderlyingObject(InnerCB->getArgOperand(I));
1473           Arg = dyn_cast<Argument>(UnderlyingV);
1474           if (!Arg)
1475             continue;
1476           ArgNo = Arg->getArgNo();
1477         } else {
1478           continue;
1479         }
1480 
1481         // If so, propagate its access attributes.
1482         AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
1483 
1484         // We can have conflicting attributes from the inner callsite and
1485         // to-be-inlined callsite. In that case, choose the most
1486         // restrictive.
1487 
1488         // readonly + writeonly means we can never deref so make readnone.
1489         if (AL.hasParamAttr(I, Attribute::ReadOnly) &&
1490             AL.hasParamAttr(I, Attribute::WriteOnly))
1491           AL = AL.addParamAttribute(Context, I, Attribute::ReadNone);
1492 
1493         // If have readnone, need to clear readonly/writeonly
1494         if (AL.hasParamAttr(I, Attribute::ReadNone)) {
1495           AL = AL.removeParamAttribute(Context, I, Attribute::ReadOnly);
1496           AL = AL.removeParamAttribute(Context, I, Attribute::WriteOnly);
1497         }
1498 
1499         // Writable cannot exist in conjunction w/ readonly/readnone
1500         if (AL.hasParamAttr(I, Attribute::ReadOnly) ||
1501             AL.hasParamAttr(I, Attribute::ReadNone))
1502           AL = AL.removeParamAttribute(Context, I, Attribute::Writable);
1503       }
1504       NewInnerCB->setAttributes(AL);
1505     }
1506   }
1507 }
1508 
1509 // Only allow these white listed attributes to be propagated back to the
1510 // callee. This is because other attributes may only be valid on the call
1511 // itself, i.e. attributes such as signext and zeroext.
1512 
1513 // Attributes that are always okay to propagate as if they are violated its
1514 // immediate UB.
1515 static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
1516   AttrBuilder Valid(CB.getContext());
1517   if (auto DerefBytes = CB.getRetDereferenceableBytes())
1518     Valid.addDereferenceableAttr(DerefBytes);
1519   if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
1520     Valid.addDereferenceableOrNullAttr(DerefOrNullBytes);
1521   if (CB.hasRetAttr(Attribute::NoAlias))
1522     Valid.addAttribute(Attribute::NoAlias);
1523   if (CB.hasRetAttr(Attribute::NoUndef))
1524     Valid.addAttribute(Attribute::NoUndef);
1525   return Valid;
1526 }
1527 
1528 // Attributes that need additional checks as propagating them may change
1529 // behavior or cause new UB.
1530 static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
1531   AttrBuilder Valid(CB.getContext());
1532   if (CB.hasRetAttr(Attribute::NonNull))
1533     Valid.addAttribute(Attribute::NonNull);
1534   if (CB.hasRetAttr(Attribute::Alignment))
1535     Valid.addAlignmentAttr(CB.getRetAlign());
1536   if (std::optional<ConstantRange> Range = CB.getRange())
1537     Valid.addRangeAttr(*Range);
1538   return Valid;
1539 }
1540 
1541 static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap,
1542                                 ClonedCodeInfo &InlinedFunctionInfo) {
1543   AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
1544   AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1545   if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
1546     return;
1547   auto *CalledFunction = CB.getCalledFunction();
1548   auto &Context = CalledFunction->getContext();
1549 
1550   for (auto &BB : *CalledFunction) {
1551     auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
1552     if (!RI || !isa<CallBase>(RI->getOperand(0)))
1553       continue;
1554     auto *RetVal = cast<CallBase>(RI->getOperand(0));
1555     // Check that the cloned RetVal exists and is a call, otherwise we cannot
1556     // add the attributes on the cloned RetVal. Simplification during inlining
1557     // could have transformed the cloned instruction.
1558     auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
1559     if (!NewRetVal)
1560       continue;
1561 
1562     // The RetVal might have be simplified during the inlining
1563     // process which can make propagation incorrect.
1564     if (InlinedFunctionInfo.isSimplified(RetVal, NewRetVal))
1565       continue;
1566     // Backward propagation of attributes to the returned value may be incorrect
1567     // if it is control flow dependent.
1568     // Consider:
1569     // @callee {
1570     //  %rv = call @foo()
1571     //  %rv2 = call @bar()
1572     //  if (%rv2 != null)
1573     //    return %rv2
1574     //  if (%rv == null)
1575     //    exit()
1576     //  return %rv
1577     // }
1578     // caller() {
1579     //   %val = call nonnull @callee()
1580     // }
1581     // Here we cannot add the nonnull attribute on either foo or bar. So, we
1582     // limit the check to both RetVal and RI are in the same basic block and
1583     // there are no throwing/exiting instructions between these instructions.
1584     if (RI->getParent() != RetVal->getParent() ||
1585         MayContainThrowingOrExitingCallAfterCB(RetVal, RI))
1586       continue;
1587     // Add to the existing attributes of NewRetVal, i.e. the cloned call
1588     // instruction.
1589     // NB! When we have the same attribute already existing on NewRetVal, but
1590     // with a differing value, the AttributeList's merge API honours the already
1591     // existing attribute value (i.e. attributes such as dereferenceable,
1592     // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1593     AttributeList AL = NewRetVal->getAttributes();
1594     if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
1595       ValidUB.removeAttribute(Attribute::Dereferenceable);
1596     if (ValidUB.getDereferenceableOrNullBytes() <
1597         AL.getRetDereferenceableOrNullBytes())
1598       ValidUB.removeAttribute(Attribute::DereferenceableOrNull);
1599     AttributeList NewAL = AL.addRetAttributes(Context, ValidUB);
1600     // Attributes that may generate poison returns are a bit tricky. If we
1601     // propagate them, other uses of the callsite might have their behavior
1602     // change or cause UB (if they have noundef) b.c of the new potential
1603     // poison.
1604     // Take the following three cases:
1605     //
1606     // 1)
1607     // define nonnull ptr @foo() {
1608     //   %p = call ptr @bar()
1609     //   call void @use(ptr %p) willreturn nounwind
1610     //   ret ptr %p
1611     // }
1612     //
1613     // 2)
1614     // define noundef nonnull ptr @foo() {
1615     //   %p = call ptr @bar()
1616     //   call void @use(ptr %p) willreturn nounwind
1617     //   ret ptr %p
1618     // }
1619     //
1620     // 3)
1621     // define nonnull ptr @foo() {
1622     //   %p = call noundef ptr @bar()
1623     //   ret ptr %p
1624     // }
1625     //
1626     // In case 1, we can't propagate nonnull because poison value in @use may
1627     // change behavior or trigger UB.
1628     // In case 2, we don't need to be concerned about propagating nonnull, as
1629     // any new poison at @use will trigger UB anyways.
1630     // In case 3, we can never propagate nonnull because it may create UB due to
1631     // the noundef on @bar.
1632     if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
1633       ValidPG.removeAttribute(Attribute::Alignment);
1634     if (ValidPG.hasAttributes()) {
1635       Attribute CBRange = ValidPG.getAttribute(Attribute::Range);
1636       if (CBRange.isValid()) {
1637         Attribute NewRange = AL.getRetAttr(Attribute::Range);
1638         if (NewRange.isValid()) {
1639           ValidPG.addRangeAttr(
1640               CBRange.getRange().intersectWith(NewRange.getRange()));
1641         }
1642       }
1643       // Three checks.
1644       // If the callsite has `noundef`, then a poison due to violating the
1645       // return attribute will create UB anyways so we can always propagate.
1646       // Otherwise, if the return value (callee to be inlined) has `noundef`, we
1647       // can't propagate as a new poison return will cause UB.
1648       // Finally, check if the return value has no uses whose behavior may
1649       // change/may cause UB if we potentially return poison. At the moment this
1650       // is implemented overly conservatively with a single-use check.
1651       // TODO: Update the single-use check to iterate through uses and only bail
1652       // if we have a potentially dangerous use.
1653 
1654       if (CB.hasRetAttr(Attribute::NoUndef) ||
1655           (RetVal->hasOneUse() && !RetVal->hasRetAttr(Attribute::NoUndef)))
1656         NewAL = NewAL.addRetAttributes(Context, ValidPG);
1657     }
1658     NewRetVal->setAttributes(NewAL);
1659   }
1660 }
1661 
1662 /// If the inlined function has non-byval align arguments, then
1663 /// add @llvm.assume-based alignment assumptions to preserve this information.
1664 static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
1665   if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
1666     return;
1667 
1668   AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
1669   auto &DL = CB.getDataLayout();
1670 
1671   // To avoid inserting redundant assumptions, we should check for assumptions
1672   // already in the caller. To do this, we might need a DT of the caller.
1673   DominatorTree DT;
1674   bool DTCalculated = false;
1675 
1676   Function *CalledFunc = CB.getCalledFunction();
1677   for (Argument &Arg : CalledFunc->args()) {
1678     if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||
1679         Arg.hasNUses(0))
1680       continue;
1681     MaybeAlign Alignment = Arg.getParamAlign();
1682     if (!Alignment)
1683       continue;
1684 
1685     if (!DTCalculated) {
1686       DT.recalculate(*CB.getCaller());
1687       DTCalculated = true;
1688     }
1689     // If we can already prove the asserted alignment in the context of the
1690     // caller, then don't bother inserting the assumption.
1691     Value *ArgVal = CB.getArgOperand(Arg.getArgNo());
1692     if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= *Alignment)
1693       continue;
1694 
1695     CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
1696         DL, ArgVal, Alignment->value());
1697     AC->registerAssumption(cast<AssumeInst>(NewAsmp));
1698   }
1699 }
1700 
1701 static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1702                                     Module *M, BasicBlock *InsertBlock,
1703                                     InlineFunctionInfo &IFI,
1704                                     Function *CalledFunc) {
1705   IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1706 
1707   Value *Size =
1708       Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));
1709 
1710   // Always generate a memcpy of alignment 1 here because we don't know
1711   // the alignment of the src pointer.  Other optimizations can infer
1712   // better alignment.
1713   CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
1714                                       /*SrcAlign*/ Align(1), Size);
1715 
1716   // The verifier requires that all calls of debug-info-bearing functions
1717   // from debug-info-bearing functions have a debug location (for inlining
1718   // purposes). Assign a dummy location to satisfy the constraint.
1719   if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
1720     if (DISubprogram *SP = CalledFunc->getSubprogram())
1721       CI->setDebugLoc(DILocation::get(SP->getContext(), 0, 0, SP));
1722 }
1723 
1724 /// When inlining a call site that has a byval argument,
1725 /// we have to make the implicit memcpy explicit by adding it.
1726 static Value *HandleByValArgument(Type *ByValType, Value *Arg,
1727                                   Instruction *TheCall,
1728                                   const Function *CalledFunc,
1729                                   InlineFunctionInfo &IFI,
1730                                   MaybeAlign ByValAlignment) {
1731   Function *Caller = TheCall->getFunction();
1732   const DataLayout &DL = Caller->getDataLayout();
1733 
1734   // If the called function is readonly, then it could not mutate the caller's
1735   // copy of the byval'd memory.  In this case, it is safe to elide the copy and
1736   // temporary.
1737   if (CalledFunc->onlyReadsMemory()) {
1738     // If the byval argument has a specified alignment that is greater than the
1739     // passed in pointer, then we either have to round up the input pointer or
1740     // give up on this transformation.
1741     if (ByValAlignment.valueOrOne() == 1)
1742       return Arg;
1743 
1744     AssumptionCache *AC =
1745         IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
1746 
1747     // If the pointer is already known to be sufficiently aligned, or if we can
1748     // round it up to a larger alignment, then we don't need a temporary.
1749     if (getOrEnforceKnownAlignment(Arg, *ByValAlignment, DL, TheCall, AC) >=
1750         *ByValAlignment)
1751       return Arg;
1752 
1753     // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
1754     // for code quality, but rarely happens and is required for correctness.
1755   }
1756 
1757   // Create the alloca.  If we have DataLayout, use nice alignment.
1758   Align Alignment = DL.getPrefTypeAlign(ByValType);
1759 
1760   // If the byval had an alignment specified, we *must* use at least that
1761   // alignment, as it is required by the byval argument (and uses of the
1762   // pointer inside the callee).
1763   if (ByValAlignment)
1764     Alignment = std::max(Alignment, *ByValAlignment);
1765 
1766   AllocaInst *NewAlloca =
1767       new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(),
1768                      nullptr, Alignment, Arg->getName());
1769   NewAlloca->insertBefore(Caller->begin()->begin());
1770   IFI.StaticAllocas.push_back(NewAlloca);
1771 
1772   // Uses of the argument in the function should use our new alloca
1773   // instead.
1774   return NewAlloca;
1775 }
1776 
1777 // Check whether this Value is used by a lifetime intrinsic.
1778 static bool isUsedByLifetimeMarker(Value *V) {
1779   for (User *U : V->users())
1780     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
1781       if (II->isLifetimeStartOrEnd())
1782         return true;
1783   return false;
1784 }
1785 
1786 // Check whether the given alloca already has
1787 // lifetime.start or lifetime.end intrinsics.
1788 static bool hasLifetimeMarkers(AllocaInst *AI) {
1789   Type *Ty = AI->getType();
1790   Type *Int8PtrTy =
1791       PointerType::get(Ty->getContext(), Ty->getPointerAddressSpace());
1792   if (Ty == Int8PtrTy)
1793     return isUsedByLifetimeMarker(AI);
1794 
1795   // Do a scan to find all the casts to i8*.
1796   for (User *U : AI->users()) {
1797     if (U->getType() != Int8PtrTy) continue;
1798     if (U->stripPointerCasts() != AI) continue;
1799     if (isUsedByLifetimeMarker(U))
1800       return true;
1801   }
1802   return false;
1803 }
1804 
1805 /// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1806 /// block. Allocas used in inalloca calls and allocas of dynamic array size
1807 /// cannot be static.
1808 static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1809   return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
1810 }
1811 
1812 /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1813 /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1814 static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1815                                LLVMContext &Ctx,
1816                                DenseMap<const MDNode *, MDNode *> &IANodes) {
1817   auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
1818   return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(),
1819                          OrigDL.getScope(), IA);
1820 }
1821 
1822 /// Update inlined instructions' line numbers to
1823 /// to encode location where these instructions are inlined.
1824 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
1825                              Instruction *TheCall, bool CalleeHasDebugInfo) {
1826   const DebugLoc &TheCallDL = TheCall->getDebugLoc();
1827   if (!TheCallDL)
1828     return;
1829 
1830   auto &Ctx = Fn->getContext();
1831   DILocation *InlinedAtNode = TheCallDL;
1832 
1833   // Create a unique call site, not to be confused with any other call from the
1834   // same location.
1835   InlinedAtNode = DILocation::getDistinct(
1836       Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
1837       InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
1838 
1839   // Cache the inlined-at nodes as they're built so they are reused, without
1840   // this every instruction's inlined-at chain would become distinct from each
1841   // other.
1842   DenseMap<const MDNode *, MDNode *> IANodes;
1843 
1844   // Check if we are not generating inline line tables and want to use
1845   // the call site location instead.
1846   bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables");
1847 
1848   // Helper-util for updating the metadata attached to an instruction.
1849   auto UpdateInst = [&](Instruction &I) {
1850     // Loop metadata needs to be updated so that the start and end locs
1851     // reference inlined-at locations.
1852     auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
1853                               &IANodes](Metadata *MD) -> Metadata * {
1854       if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
1855         return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get();
1856       return MD;
1857     };
1858     updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
1859 
1860     if (!NoInlineLineTables)
1861       if (DebugLoc DL = I.getDebugLoc()) {
1862         DebugLoc IDL =
1863             inlineDebugLoc(DL, InlinedAtNode, I.getContext(), IANodes);
1864         I.setDebugLoc(IDL);
1865         return;
1866       }
1867 
1868     if (CalleeHasDebugInfo && !NoInlineLineTables)
1869       return;
1870 
1871     // If the inlined instruction has no line number, or if inline info
1872     // is not being generated, make it look as if it originates from the call
1873     // location. This is important for ((__always_inline, __nodebug__))
1874     // functions which must use caller location for all instructions in their
1875     // function body.
1876 
1877     // Don't update static allocas, as they may get moved later.
1878     if (auto *AI = dyn_cast<AllocaInst>(&I))
1879       if (allocaWouldBeStaticInEntry(AI))
1880         return;
1881 
1882     // Do not force a debug loc for pseudo probes, since they do not need to
1883     // be debuggable, and also they are expected to have a zero/null dwarf
1884     // discriminator at this point which could be violated otherwise.
1885     if (isa<PseudoProbeInst>(I))
1886       return;
1887 
1888     I.setDebugLoc(TheCallDL);
1889   };
1890 
1891   // Helper-util for updating debug-info records attached to instructions.
1892   auto UpdateDVR = [&](DbgRecord *DVR) {
1893     assert(DVR->getDebugLoc() && "Debug Value must have debug loc");
1894     if (NoInlineLineTables) {
1895       DVR->setDebugLoc(TheCallDL);
1896       return;
1897     }
1898     DebugLoc DL = DVR->getDebugLoc();
1899     DebugLoc IDL =
1900         inlineDebugLoc(DL, InlinedAtNode,
1901                        DVR->getMarker()->getParent()->getContext(), IANodes);
1902     DVR->setDebugLoc(IDL);
1903   };
1904 
1905   // Iterate over all instructions, updating metadata and debug-info records.
1906   for (; FI != Fn->end(); ++FI) {
1907     for (Instruction &I : *FI) {
1908       UpdateInst(I);
1909       for (DbgRecord &DVR : I.getDbgRecordRange()) {
1910         UpdateDVR(&DVR);
1911       }
1912     }
1913 
1914     // Remove debug info intrinsics if we're not keeping inline info.
1915     if (NoInlineLineTables) {
1916       BasicBlock::iterator BI = FI->begin();
1917       while (BI != FI->end()) {
1918         if (isa<DbgInfoIntrinsic>(BI)) {
1919           BI = BI->eraseFromParent();
1920           continue;
1921         } else {
1922           BI->dropDbgRecords();
1923         }
1924         ++BI;
1925       }
1926     }
1927   }
1928 }
1929 
1930 #undef DEBUG_TYPE
1931 #define DEBUG_TYPE "assignment-tracking"
1932 /// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
1933 static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
1934                                                  const CallBase &CB) {
1935   at::StorageToVarsMap EscapedLocals;
1936   SmallPtrSet<const Value *, 4> SeenBases;
1937 
1938   LLVM_DEBUG(
1939       errs() << "# Finding caller local variables escaped by callee\n");
1940   for (const Value *Arg : CB.args()) {
1941     LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
1942     if (!Arg->getType()->isPointerTy()) {
1943       LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");
1944       continue;
1945     }
1946 
1947     const Instruction *I = dyn_cast<Instruction>(Arg);
1948     if (!I) {
1949       LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");
1950       continue;
1951     }
1952 
1953     // Walk back to the base storage.
1954     assert(Arg->getType()->isPtrOrPtrVectorTy());
1955     APInt TmpOffset(DL.getIndexTypeSizeInBits(Arg->getType()), 0, false);
1956     const AllocaInst *Base = dyn_cast<AllocaInst>(
1957         Arg->stripAndAccumulateConstantOffsets(DL, TmpOffset, true));
1958     if (!Base) {
1959       LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");
1960       continue;
1961     }
1962 
1963     assert(Base);
1964     LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");
1965     // We only need to process each base address once - skip any duplicates.
1966     if (!SeenBases.insert(Base).second)
1967       continue;
1968 
1969     // Find all local variables associated with the backing storage.
1970     auto CollectAssignsForStorage = [&](auto *DbgAssign) {
1971       // Skip variables from inlined functions - they are not local variables.
1972       if (DbgAssign->getDebugLoc().getInlinedAt())
1973         return;
1974       LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
1975       EscapedLocals[Base].insert(at::VarRecord(DbgAssign));
1976     };
1977     for_each(at::getAssignmentMarkers(Base), CollectAssignsForStorage);
1978     for_each(at::getDVRAssignmentMarkers(Base), CollectAssignsForStorage);
1979   }
1980   return EscapedLocals;
1981 }
1982 
1983 static void trackInlinedStores(Function::iterator Start, Function::iterator End,
1984                                const CallBase &CB) {
1985   LLVM_DEBUG(errs() << "trackInlinedStores into "
1986                     << Start->getParent()->getName() << " from "
1987                     << CB.getCalledFunction()->getName() << "\n");
1988   const DataLayout &DL = CB.getDataLayout();
1989   at::trackAssignments(Start, End, collectEscapedLocals(DL, CB), DL);
1990 }
1991 
1992 /// Update inlined instructions' DIAssignID metadata. We need to do this
1993 /// otherwise a function inlined more than once into the same function
1994 /// will cause DIAssignID to be shared by many instructions.
1995 static void fixupAssignments(Function::iterator Start, Function::iterator End) {
1996   DenseMap<DIAssignID *, DIAssignID *> Map;
1997   // Loop over all the inlined instructions. If we find a DIAssignID
1998   // attachment or use, replace it with a new version.
1999   for (auto BBI = Start; BBI != End; ++BBI) {
2000     for (Instruction &I : *BBI)
2001       at::remapAssignID(Map, I);
2002   }
2003 }
2004 #undef DEBUG_TYPE
2005 #define DEBUG_TYPE "inline-function"
2006 
2007 /// Update the block frequencies of the caller after a callee has been inlined.
2008 ///
2009 /// Each block cloned into the caller has its block frequency scaled by the
2010 /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
2011 /// callee's entry block gets the same frequency as the callsite block and the
2012 /// relative frequencies of all cloned blocks remain the same after cloning.
2013 static void updateCallerBFI(BasicBlock *CallSiteBlock,
2014                             const ValueToValueMapTy &VMap,
2015                             BlockFrequencyInfo *CallerBFI,
2016                             BlockFrequencyInfo *CalleeBFI,
2017                             const BasicBlock &CalleeEntryBlock) {
2018   SmallPtrSet<BasicBlock *, 16> ClonedBBs;
2019   for (auto Entry : VMap) {
2020     if (!isa<BasicBlock>(Entry.first) || !Entry.second)
2021       continue;
2022     auto *OrigBB = cast<BasicBlock>(Entry.first);
2023     auto *ClonedBB = cast<BasicBlock>(Entry.second);
2024     BlockFrequency Freq = CalleeBFI->getBlockFreq(OrigBB);
2025     if (!ClonedBBs.insert(ClonedBB).second) {
2026       // Multiple blocks in the callee might get mapped to one cloned block in
2027       // the caller since we prune the callee as we clone it. When that happens,
2028       // we want to use the maximum among the original blocks' frequencies.
2029       BlockFrequency NewFreq = CallerBFI->getBlockFreq(ClonedBB);
2030       if (NewFreq > Freq)
2031         Freq = NewFreq;
2032     }
2033     CallerBFI->setBlockFreq(ClonedBB, Freq);
2034   }
2035   BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
2036   CallerBFI->setBlockFreqAndScale(
2037       EntryClone, CallerBFI->getBlockFreq(CallSiteBlock), ClonedBBs);
2038 }
2039 
2040 /// Update the branch metadata for cloned call instructions.
2041 static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
2042                               const ProfileCount &CalleeEntryCount,
2043                               const CallBase &TheCall, ProfileSummaryInfo *PSI,
2044                               BlockFrequencyInfo *CallerBFI) {
2045   if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
2046     return;
2047   auto CallSiteCount =
2048       PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt;
2049   int64_t CallCount =
2050       std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount());
2051   updateProfileCallee(Callee, -CallCount, &VMap);
2052 }
2053 
2054 void llvm::updateProfileCallee(
2055     Function *Callee, int64_t EntryDelta,
2056     const ValueMap<const Value *, WeakTrackingVH> *VMap) {
2057   auto CalleeCount = Callee->getEntryCount();
2058   if (!CalleeCount)
2059     return;
2060 
2061   const uint64_t PriorEntryCount = CalleeCount->getCount();
2062 
2063   // Since CallSiteCount is an estimate, it could exceed the original callee
2064   // count and has to be set to 0 so guard against underflow.
2065   const uint64_t NewEntryCount =
2066       (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
2067           ? 0
2068           : PriorEntryCount + EntryDelta;
2069 
2070   auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,
2071                                    const uint64_t PriorEntryCount) {
2072     Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
2073     if (VPtr)
2074       scaleProfData(*VPtr, NewEntryCount, PriorEntryCount);
2075   };
2076 
2077   // During inlining ?
2078   if (VMap) {
2079     uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
2080     for (auto Entry : *VMap) {
2081       if (isa<CallInst>(Entry.first))
2082         if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) {
2083           CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
2084           updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount);
2085         }
2086 
2087       if (isa<InvokeInst>(Entry.first))
2088         if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second)) {
2089           II->updateProfWeight(CloneEntryCount, PriorEntryCount);
2090           updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount);
2091         }
2092     }
2093   }
2094 
2095   if (EntryDelta) {
2096     Callee->setEntryCount(NewEntryCount);
2097 
2098     for (BasicBlock &BB : *Callee)
2099       // No need to update the callsite if it is pruned during inlining.
2100       if (!VMap || VMap->count(&BB))
2101         for (Instruction &I : BB) {
2102           if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2103             CI->updateProfWeight(NewEntryCount, PriorEntryCount);
2104             updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount);
2105           }
2106           if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
2107             II->updateProfWeight(NewEntryCount, PriorEntryCount);
2108             updateVTableProfWeight(II, NewEntryCount, PriorEntryCount);
2109           }
2110         }
2111   }
2112 }
2113 
2114 /// An operand bundle "clang.arc.attachedcall" on a call indicates the call
2115 /// result is implicitly consumed by a call to retainRV or claimRV immediately
2116 /// after the call. This function inlines the retainRV/claimRV calls.
2117 ///
2118 /// There are three cases to consider:
2119 ///
2120 /// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
2121 ///    object in the callee return block, the autoreleaseRV call and the
2122 ///    retainRV/claimRV call in the caller cancel out. If the call in the caller
2123 ///    is a claimRV call, a call to objc_release is emitted.
2124 ///
2125 /// 2. If there is a call in the callee return block that doesn't have operand
2126 ///    bundle "clang.arc.attachedcall", the operand bundle on the original call
2127 ///    is transferred to the call in the callee.
2128 ///
2129 /// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
2130 ///    a retainRV call.
2131 static void
2132 inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
2133                            const SmallVectorImpl<ReturnInst *> &Returns) {
2134   assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
2135   bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
2136        IsUnsafeClaimRV = !IsRetainRV;
2137 
2138   for (auto *RI : Returns) {
2139     Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));
2140     bool InsertRetainCall = IsRetainRV;
2141     IRBuilder<> Builder(RI->getContext());
2142 
2143     // Walk backwards through the basic block looking for either a matching
2144     // autoreleaseRV call or an unannotated call.
2145     auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()),
2146                                       RI->getParent()->rend());
2147     for (Instruction &I : llvm::make_early_inc_range(InstRange)) {
2148       // Ignore casts.
2149       if (isa<CastInst>(I))
2150         continue;
2151 
2152       if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
2153         if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
2154             !II->hasNUses(0) ||
2155             objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd)
2156           break;
2157 
2158         // If we've found a matching authoreleaseRV call:
2159         // - If claimRV is attached to the call, insert a call to objc_release
2160         //   and erase the autoreleaseRV call.
2161         // - If retainRV is attached to the call, just erase the autoreleaseRV
2162         //   call.
2163         if (IsUnsafeClaimRV) {
2164           Builder.SetInsertPoint(II);
2165           Builder.CreateIntrinsic(Intrinsic::objc_release, {}, RetOpnd);
2166         }
2167         II->eraseFromParent();
2168         InsertRetainCall = false;
2169         break;
2170       }
2171 
2172       auto *CI = dyn_cast<CallInst>(&I);
2173 
2174       if (!CI)
2175         break;
2176 
2177       if (objcarc::GetRCIdentityRoot(CI) != RetOpnd ||
2178           objcarc::hasAttachedCallOpBundle(CI))
2179         break;
2180 
2181       // If we've found an unannotated call that defines RetOpnd, add a
2182       // "clang.arc.attachedcall" operand bundle.
2183       Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)};
2184       OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
2185       auto *NewCall = CallBase::addOperandBundle(
2186           CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI->getIterator());
2187       NewCall->copyMetadata(*CI);
2188       CI->replaceAllUsesWith(NewCall);
2189       CI->eraseFromParent();
2190       InsertRetainCall = false;
2191       break;
2192     }
2193 
2194     if (InsertRetainCall) {
2195       // The retainRV is attached to the call and we've failed to find a
2196       // matching autoreleaseRV or an annotated call in the callee. Emit a call
2197       // to objc_retain.
2198       Builder.SetInsertPoint(RI);
2199       Builder.CreateIntrinsic(Intrinsic::objc_retain, {}, RetOpnd);
2200     }
2201   }
2202 }
2203 
2204 // In contextual profiling, when an inline succeeds, we want to remap the
2205 // indices of the callee into the index space of the caller. We can't just leave
2206 // them as-is because the same callee may appear in other places in this caller
2207 // (other callsites), and its (callee's) counters and sub-contextual profile
2208 // tree would be potentially different.
2209 // Not all BBs of the callee may survive the opportunistic DCE InlineFunction
2210 // does (same goes for callsites in the callee).
2211 // We will return a pair of vectors, one for basic block IDs and one for
2212 // callsites. For such a vector V, V[Idx] will be -1 if the callee
2213 // instrumentation with index Idx did not survive inlining, and a new value
2214 // otherwise.
2215 // This function will update the caller's instrumentation intrinsics
2216 // accordingly, mapping indices as described above. We also replace the "name"
2217 // operand because we use it to distinguish between "own" instrumentation and
2218 // "from callee" instrumentation when performing the traversal of the CFG of the
2219 // caller. We traverse depth-first from the callsite's BB and up to the point we
2220 // hit BBs owned by the caller.
2221 // The return values will be then used to update the contextual
2222 // profile. Note: we only update the "name" and "index" operands in the
2223 // instrumentation intrinsics, we leave the hash and total nr of indices as-is,
2224 // it's not worth updating those.
2225 static const std::pair<std::vector<int64_t>, std::vector<int64_t>>
2226 remapIndices(Function &Caller, BasicBlock *StartBB,
2227              PGOContextualProfile &CtxProf, uint32_t CalleeCounters,
2228              uint32_t CalleeCallsites) {
2229   // We'll allocate a new ID to imported callsite counters and callsites. We're
2230   // using -1 to indicate a counter we delete. Most likely the entry ID, for
2231   // example, will be deleted - we don't want 2 IDs in the same BB, and the
2232   // entry would have been cloned in the callsite's old BB.
2233   std::vector<int64_t> CalleeCounterMap;
2234   std::vector<int64_t> CalleeCallsiteMap;
2235   CalleeCounterMap.resize(CalleeCounters, -1);
2236   CalleeCallsiteMap.resize(CalleeCallsites, -1);
2237 
2238   auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool {
2239     if (Ins.getNameValue() == &Caller)
2240       return false;
2241     const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2242     if (CalleeCounterMap[OldID] == -1)
2243       CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(Caller);
2244     const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]);
2245 
2246     Ins.setNameValue(&Caller);
2247     Ins.setIndex(NewID);
2248     return true;
2249   };
2250 
2251   auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool {
2252     if (Ins.getNameValue() == &Caller)
2253       return false;
2254     const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2255     if (CalleeCallsiteMap[OldID] == -1)
2256       CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(Caller);
2257     const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]);
2258 
2259     Ins.setNameValue(&Caller);
2260     Ins.setIndex(NewID);
2261     return true;
2262   };
2263 
2264   std::deque<BasicBlock *> Worklist;
2265   DenseSet<const BasicBlock *> Seen;
2266   // We will traverse the BBs starting from the callsite BB. The callsite BB
2267   // will have at least a BB ID - maybe its own, and in any case the one coming
2268   // from the cloned function's entry BB. The other BBs we'll start seeing from
2269   // there on may or may not have BB IDs. BBs with IDs belonging to our caller
2270   // are definitely not coming from the imported function and form a boundary
2271   // past which we don't need to traverse anymore. BBs may have no
2272   // instrumentation (because we originally inserted instrumentation as per
2273   // MST), in which case we'll traverse past them. An invariant we'll keep is
2274   // that a BB will have at most 1 BB ID. For example, in the callsite BB, we
2275   // will delete the callee BB's instrumentation. This doesn't result in
2276   // information loss: the entry BB of the callee will have the same count as
2277   // the callsite's BB. At the end of this traversal, all the callee's
2278   // instrumentation would be mapped into the caller's instrumentation index
2279   // space. Some of the callee's counters may be deleted (as mentioned, this
2280   // should result in no loss of information).
2281   Worklist.push_back(StartBB);
2282   while (!Worklist.empty()) {
2283     auto *BB = Worklist.front();
2284     Worklist.pop_front();
2285     bool Changed = false;
2286     auto *BBID = CtxProfAnalysis::getBBInstrumentation(*BB);
2287     if (BBID) {
2288       Changed |= RewriteInstrIfNeeded(*BBID);
2289       // this may be the entryblock from the inlined callee, coming into a BB
2290       // that didn't have instrumentation because of MST decisions. Let's make
2291       // sure it's placed accordingly. This is a noop elsewhere.
2292       BBID->moveBefore(BB->getFirstInsertionPt());
2293     }
2294     for (auto &I : llvm::make_early_inc_range(*BB)) {
2295       if (auto *Inc = dyn_cast<InstrProfIncrementInst>(&I)) {
2296         if (isa<InstrProfIncrementInstStep>(Inc)) {
2297           // Step instrumentation is used for select instructions. Inlining may
2298           // have propagated a constant resulting in the condition of the select
2299           // being resolved, case in which function cloning resolves the value
2300           // of the select, and elides the select instruction. If that is the
2301           // case, the step parameter of the instrumentation will reflect that.
2302           // We can delete the instrumentation in that case.
2303           if (isa<Constant>(Inc->getStep())) {
2304             assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode()));
2305             Inc->eraseFromParent();
2306           } else {
2307             assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
2308             RewriteInstrIfNeeded(*Inc);
2309           }
2310         } else if (Inc != BBID) {
2311           // If we're here it means that the BB had more than 1 IDs, presumably
2312           // some coming from the callee. We "made up our mind" to keep the
2313           // first one (which may or may not have been originally the caller's).
2314           // All the others are superfluous and we delete them.
2315           Inc->eraseFromParent();
2316           Changed = true;
2317         }
2318       } else if (auto *CS = dyn_cast<InstrProfCallsite>(&I)) {
2319         Changed |= RewriteCallsiteInsIfNeeded(*CS);
2320       }
2321     }
2322     if (!BBID || Changed)
2323       for (auto *Succ : successors(BB))
2324         if (Seen.insert(Succ).second)
2325           Worklist.push_back(Succ);
2326   }
2327 
2328   assert(
2329       llvm::all_of(CalleeCounterMap, [&](const auto &V) { return V != 0; }) &&
2330       "Counter index mapping should be either to -1 or to non-zero index, "
2331       "because the 0 "
2332       "index corresponds to the entry BB of the caller");
2333   assert(
2334       llvm::all_of(CalleeCallsiteMap, [&](const auto &V) { return V != 0; }) &&
2335       "Callsite index mapping should be either to -1 or to non-zero index, "
2336       "because there should have been at least a callsite - the inlined one "
2337       "- which would have had a 0 index.");
2338 
2339   return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)};
2340 }
2341 
2342 // Inline. If successful, update the contextual profile (if a valid one is
2343 // given).
2344 // The contextual profile data is organized in trees, as follows:
2345 //  - each node corresponds to a function
2346 //  - the root of each tree corresponds to an "entrypoint" - e.g.
2347 //    RPC handler for server side
2348 //  - the path from the root to a node is a particular call path
2349 //  - the counters stored in a node are counter values observed in that
2350 //    particular call path ("context")
2351 //  - the edges between nodes are annotated with callsite IDs.
2352 //
2353 // Updating the contextual profile after an inlining means, at a high level,
2354 // copying over the data of the callee, **intentionally without any value
2355 // scaling**, and copying over the callees of the inlined callee.
2356 llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2357                                         PGOContextualProfile &CtxProf,
2358                                         bool MergeAttributes,
2359                                         AAResults *CalleeAAR,
2360                                         bool InsertLifetime,
2361                                         Function *ForwardVarArgsTo) {
2362   if (!CtxProf)
2363     return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2364                           ForwardVarArgsTo);
2365 
2366   auto &Caller = *CB.getCaller();
2367   auto &Callee = *CB.getCalledFunction();
2368   auto *StartBB = CB.getParent();
2369 
2370   // Get some preliminary data about the callsite before it might get inlined.
2371   // Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to
2372   // get this data upfront and rely less on InlineFunction's behavior.
2373   const auto CalleeGUID = AssignGUIDPass::getGUID(Callee);
2374   auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB);
2375   const auto CallsiteID =
2376       static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue());
2377 
2378   const auto NumCalleeCounters = CtxProf.getNumCounters(Callee);
2379   const auto NumCalleeCallsites = CtxProf.getNumCallsites(Callee);
2380 
2381   auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2382                             ForwardVarArgsTo);
2383   if (!Ret.isSuccess())
2384     return Ret;
2385 
2386   // Inlining succeeded, we don't need the instrumentation of the inlined
2387   // callsite.
2388   CallsiteIDIns->eraseFromParent();
2389 
2390   // Assinging Maps and then capturing references into it in the lambda because
2391   // captured structured bindings are a C++20 extension. We do also need a
2392   // capture here, though.
2393   const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf,
2394                                         NumCalleeCounters, NumCalleeCallsites);
2395   const uint32_t NewCountersSize = CtxProf.getNumCounters(Caller);
2396 
2397   auto Updater = [&](PGOCtxProfContext &Ctx) {
2398     assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller));
2399     const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps;
2400     assert(
2401         (Ctx.counters().size() +
2402              llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) ==
2403          NewCountersSize) &&
2404         "The caller's counters size should have grown by the number of new "
2405         "distinct counters inherited from the inlined callee.");
2406     Ctx.resizeCounters(NewCountersSize);
2407     // If the callsite wasn't exercised in this context, the value of the
2408     // counters coming from it is 0 - which it is right now, after resizing them
2409     // - and so we're done.
2410     auto CSIt = Ctx.callsites().find(CallsiteID);
2411     if (CSIt == Ctx.callsites().end())
2412       return;
2413     auto CalleeCtxIt = CSIt->second.find(CalleeGUID);
2414     // The callsite was exercised, but not with this callee (so presumably this
2415     // is an indirect callsite). Again, we're done here.
2416     if (CalleeCtxIt == CSIt->second.end())
2417       return;
2418 
2419     // Let's pull in the counter values and the subcontexts coming from the
2420     // inlined callee.
2421     auto &CalleeCtx = CalleeCtxIt->second;
2422     assert(CalleeCtx.guid() == CalleeGUID);
2423 
2424     for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) {
2425       const int64_t NewIndex = CalleeCounterMap[I];
2426       if (NewIndex >= 0) {
2427         assert(NewIndex != 0 && "counter index mapping shouldn't happen to a 0 "
2428                                 "index, that's the caller's entry BB");
2429         Ctx.counters()[NewIndex] = CalleeCtx.counters()[I];
2430       }
2431     }
2432     for (auto &[I, OtherSet] : CalleeCtx.callsites()) {
2433       const int64_t NewCSIdx = CalleeCallsiteMap[I];
2434       if (NewCSIdx >= 0) {
2435         assert(NewCSIdx != 0 &&
2436                "callsite index mapping shouldn't happen to a 0 index, the "
2437                "caller must've had at least one callsite (with such an index)");
2438         Ctx.ingestAllContexts(NewCSIdx, std::move(OtherSet));
2439       }
2440     }
2441     // We know the traversal is preorder, so it wouldn't have yet looked at the
2442     // sub-contexts of this context that it's currently visiting. Meaning, the
2443     // erase below invalidates no iterators.
2444     auto Deleted = Ctx.callsites().erase(CallsiteID);
2445     assert(Deleted);
2446     (void)Deleted;
2447   };
2448   CtxProf.update(Updater, Caller);
2449   return Ret;
2450 }
2451 
2452 /// This function inlines the called function into the basic block of the
2453 /// caller. This returns false if it is not possible to inline this call.
2454 /// The program is still in a well defined state if this occurs though.
2455 ///
2456 /// Note that this only does one level of inlining.  For example, if the
2457 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
2458 /// exists in the instruction stream.  Similarly this will inline a recursive
2459 /// function by one level.
2460 llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2461                                         bool MergeAttributes,
2462                                         AAResults *CalleeAAR,
2463                                         bool InsertLifetime,
2464                                         Function *ForwardVarArgsTo) {
2465   assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
2466 
2467   // FIXME: we don't inline callbr yet.
2468   if (isa<CallBrInst>(CB))
2469     return InlineResult::failure("We don't inline callbr yet.");
2470 
2471   // If IFI has any state in it, zap it before we fill it in.
2472   IFI.reset();
2473 
2474   Function *CalledFunc = CB.getCalledFunction();
2475   if (!CalledFunc ||               // Can't inline external function or indirect
2476       CalledFunc->isDeclaration()) // call!
2477     return InlineResult::failure("external or indirect");
2478 
2479   // The inliner does not know how to inline through calls with operand bundles
2480   // in general ...
2481   Value *ConvergenceControlToken = nullptr;
2482   if (CB.hasOperandBundles()) {
2483     for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
2484       auto OBUse = CB.getOperandBundleAt(i);
2485       uint32_t Tag = OBUse.getTagID();
2486       // ... but it knows how to inline through "deopt" operand bundles ...
2487       if (Tag == LLVMContext::OB_deopt)
2488         continue;
2489       // ... and "funclet" operand bundles.
2490       if (Tag == LLVMContext::OB_funclet)
2491         continue;
2492       if (Tag == LLVMContext::OB_clang_arc_attachedcall)
2493         continue;
2494       if (Tag == LLVMContext::OB_kcfi)
2495         continue;
2496       if (Tag == LLVMContext::OB_convergencectrl) {
2497         ConvergenceControlToken = OBUse.Inputs[0].get();
2498         continue;
2499       }
2500 
2501       return InlineResult::failure("unsupported operand bundle");
2502     }
2503   }
2504 
2505   // FIXME: The check below is redundant and incomplete. According to spec, if a
2506   // convergent call is missing a token, then the caller is using uncontrolled
2507   // convergence. If the callee has an entry intrinsic, then the callee is using
2508   // controlled convergence, and the call cannot be inlined. A proper
2509   // implemenation of this check requires a whole new analysis that identifies
2510   // convergence in every function. For now, we skip that and just do this one
2511   // cursory check. The underlying assumption is that in a compiler flow that
2512   // fully implements convergence control tokens, there is no mixing of
2513   // controlled and uncontrolled convergent operations in the whole program.
2514   if (CB.isConvergent()) {
2515     if (!ConvergenceControlToken &&
2516         getConvergenceEntry(CalledFunc->getEntryBlock())) {
2517       return InlineResult::failure(
2518           "convergent call needs convergencectrl operand");
2519     }
2520   }
2521 
2522   // If the call to the callee cannot throw, set the 'nounwind' flag on any
2523   // calls that we inline.
2524   bool MarkNoUnwind = CB.doesNotThrow();
2525 
2526   BasicBlock *OrigBB = CB.getParent();
2527   Function *Caller = OrigBB->getParent();
2528 
2529   // GC poses two hazards to inlining, which only occur when the callee has GC:
2530   //  1. If the caller has no GC, then the callee's GC must be propagated to the
2531   //     caller.
2532   //  2. If the caller has a differing GC, it is invalid to inline.
2533   if (CalledFunc->hasGC()) {
2534     if (!Caller->hasGC())
2535       Caller->setGC(CalledFunc->getGC());
2536     else if (CalledFunc->getGC() != Caller->getGC())
2537       return InlineResult::failure("incompatible GC");
2538   }
2539 
2540   // Get the personality function from the callee if it contains a landing pad.
2541   Constant *CalledPersonality =
2542       CalledFunc->hasPersonalityFn()
2543           ? CalledFunc->getPersonalityFn()->stripPointerCasts()
2544           : nullptr;
2545 
2546   // Find the personality function used by the landing pads of the caller. If it
2547   // exists, then check to see that it matches the personality function used in
2548   // the callee.
2549   Constant *CallerPersonality =
2550       Caller->hasPersonalityFn()
2551           ? Caller->getPersonalityFn()->stripPointerCasts()
2552           : nullptr;
2553   if (CalledPersonality) {
2554     if (!CallerPersonality)
2555       Caller->setPersonalityFn(CalledPersonality);
2556     // If the personality functions match, then we can perform the
2557     // inlining. Otherwise, we can't inline.
2558     // TODO: This isn't 100% true. Some personality functions are proper
2559     //       supersets of others and can be used in place of the other.
2560     else if (CalledPersonality != CallerPersonality)
2561       return InlineResult::failure("incompatible personality");
2562   }
2563 
2564   // We need to figure out which funclet the callsite was in so that we may
2565   // properly nest the callee.
2566   Instruction *CallSiteEHPad = nullptr;
2567   if (CallerPersonality) {
2568     EHPersonality Personality = classifyEHPersonality(CallerPersonality);
2569     if (isScopedEHPersonality(Personality)) {
2570       std::optional<OperandBundleUse> ParentFunclet =
2571           CB.getOperandBundle(LLVMContext::OB_funclet);
2572       if (ParentFunclet)
2573         CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
2574 
2575       // OK, the inlining site is legal.  What about the target function?
2576 
2577       if (CallSiteEHPad) {
2578         if (Personality == EHPersonality::MSVC_CXX) {
2579           // The MSVC personality cannot tolerate catches getting inlined into
2580           // cleanup funclets.
2581           if (isa<CleanupPadInst>(CallSiteEHPad)) {
2582             // Ok, the call site is within a cleanuppad.  Let's check the callee
2583             // for catchpads.
2584             for (const BasicBlock &CalledBB : *CalledFunc) {
2585               if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHIIt()))
2586                 return InlineResult::failure("catch in cleanup funclet");
2587             }
2588           }
2589         } else if (isAsynchronousEHPersonality(Personality)) {
2590           // SEH is even less tolerant, there may not be any sort of exceptional
2591           // funclet in the callee.
2592           for (const BasicBlock &CalledBB : *CalledFunc) {
2593             if (CalledBB.isEHPad())
2594               return InlineResult::failure("SEH in cleanup funclet");
2595           }
2596         }
2597       }
2598     }
2599   }
2600 
2601   // Determine if we are dealing with a call in an EHPad which does not unwind
2602   // to caller.
2603   bool EHPadForCallUnwindsLocally = false;
2604   if (CallSiteEHPad && isa<CallInst>(CB)) {
2605     UnwindDestMemoTy FuncletUnwindMap;
2606     Value *CallSiteUnwindDestToken =
2607         getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
2608 
2609     EHPadForCallUnwindsLocally =
2610         CallSiteUnwindDestToken &&
2611         !isa<ConstantTokenNone>(CallSiteUnwindDestToken);
2612   }
2613 
2614   // Get an iterator to the last basic block in the function, which will have
2615   // the new function inlined after it.
2616   Function::iterator LastBlock = --Caller->end();
2617 
2618   // Make sure to capture all of the return instructions from the cloned
2619   // function.
2620   SmallVector<ReturnInst*, 8> Returns;
2621   ClonedCodeInfo InlinedFunctionInfo;
2622   Function::iterator FirstNewBlock;
2623 
2624   { // Scope to destroy VMap after cloning.
2625     ValueToValueMapTy VMap;
2626     struct ByValInit {
2627       Value *Dst;
2628       Value *Src;
2629       Type *Ty;
2630     };
2631     // Keep a list of pair (dst, src) to emit byval initializations.
2632     SmallVector<ByValInit, 4> ByValInits;
2633 
2634     // When inlining a function that contains noalias scope metadata,
2635     // this metadata needs to be cloned so that the inlined blocks
2636     // have different "unique scopes" at every call site.
2637     // Track the metadata that must be cloned. Do this before other changes to
2638     // the function, so that we do not get in trouble when inlining caller ==
2639     // callee.
2640     ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
2641 
2642     auto &DL = Caller->getDataLayout();
2643 
2644     // Calculate the vector of arguments to pass into the function cloner, which
2645     // matches up the formal to the actual argument values.
2646     auto AI = CB.arg_begin();
2647     unsigned ArgNo = 0;
2648     for (Function::arg_iterator I = CalledFunc->arg_begin(),
2649          E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
2650       Value *ActualArg = *AI;
2651 
2652       // When byval arguments actually inlined, we need to make the copy implied
2653       // by them explicit.  However, we don't do this if the callee is readonly
2654       // or readnone, because the copy would be unneeded: the callee doesn't
2655       // modify the struct.
2656       if (CB.isByValArgument(ArgNo)) {
2657         ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg,
2658                                         &CB, CalledFunc, IFI,
2659                                         CalledFunc->getParamAlign(ArgNo));
2660         if (ActualArg != *AI)
2661           ByValInits.push_back(
2662               {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});
2663       }
2664 
2665       VMap[&*I] = ActualArg;
2666     }
2667 
2668     // TODO: Remove this when users have been updated to the assume bundles.
2669     // Add alignment assumptions if necessary. We do this before the inlined
2670     // instructions are actually cloned into the caller so that we can easily
2671     // check what will be known at the start of the inlined code.
2672     AddAlignmentAssumptions(CB, IFI);
2673 
2674     AssumptionCache *AC =
2675         IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
2676 
2677     /// Preserve all attributes on of the call and its parameters.
2678     salvageKnowledge(&CB, AC);
2679 
2680     // We want the inliner to prune the code as it copies.  We would LOVE to
2681     // have no dead or constant instructions leftover after inlining occurs
2682     // (which can happen, e.g., because an argument was constant), but we'll be
2683     // happy with whatever the cloner can do.
2684     CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
2685                               /*ModuleLevelChanges=*/false, Returns, ".i",
2686                               &InlinedFunctionInfo);
2687     // Remember the first block that is newly cloned over.
2688     FirstNewBlock = LastBlock; ++FirstNewBlock;
2689 
2690     // Insert retainRV/clainRV runtime calls.
2691     objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB);
2692     if (RVCallKind != objcarc::ARCInstKind::None)
2693       inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
2694 
2695     // Updated caller/callee profiles only when requested. For sample loader
2696     // inlining, the context-sensitive inlinee profile doesn't need to be
2697     // subtracted from callee profile, and the inlined clone also doesn't need
2698     // to be scaled based on call site count.
2699     if (IFI.UpdateProfile) {
2700       if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
2701         // Update the BFI of blocks cloned into the caller.
2702         updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
2703                         CalledFunc->front());
2704 
2705       if (auto Profile = CalledFunc->getEntryCount())
2706         updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI,
2707                           IFI.CallerBFI);
2708     }
2709 
2710     // Inject byval arguments initialization.
2711     for (ByValInit &Init : ByValInits)
2712       HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
2713                               &*FirstNewBlock, IFI, CalledFunc);
2714 
2715     std::optional<OperandBundleUse> ParentDeopt =
2716         CB.getOperandBundle(LLVMContext::OB_deopt);
2717     if (ParentDeopt) {
2718       SmallVector<OperandBundleDef, 2> OpDefs;
2719 
2720       for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
2721         CallBase *ICS = dyn_cast_or_null<CallBase>(VH);
2722         if (!ICS)
2723           continue; // instruction was DCE'd or RAUW'ed to undef
2724 
2725         OpDefs.clear();
2726 
2727         OpDefs.reserve(ICS->getNumOperandBundles());
2728 
2729         for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
2730              ++COBi) {
2731           auto ChildOB = ICS->getOperandBundleAt(COBi);
2732           if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
2733             // If the inlined call has other operand bundles, let them be
2734             OpDefs.emplace_back(ChildOB);
2735             continue;
2736           }
2737 
2738           // It may be useful to separate this logic (of handling operand
2739           // bundles) out to a separate "policy" component if this gets crowded.
2740           // Prepend the parent's deoptimization continuation to the newly
2741           // inlined call's deoptimization continuation.
2742           std::vector<Value *> MergedDeoptArgs;
2743           MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
2744                                   ChildOB.Inputs.size());
2745 
2746           llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs);
2747           llvm::append_range(MergedDeoptArgs, ChildOB.Inputs);
2748 
2749           OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
2750         }
2751 
2752         Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS->getIterator());
2753 
2754         // Note: the RAUW does the appropriate fixup in VMap, so we need to do
2755         // this even if the call returns void.
2756         ICS->replaceAllUsesWith(NewI);
2757 
2758         VH = nullptr;
2759         ICS->eraseFromParent();
2760       }
2761     }
2762 
2763     // For 'nodebug' functions, the associated DISubprogram is always null.
2764     // Conservatively avoid propagating the callsite debug location to
2765     // instructions inlined from a function whose DISubprogram is not null.
2766     fixupLineNumbers(Caller, FirstNewBlock, &CB,
2767                      CalledFunc->getSubprogram() != nullptr);
2768 
2769     if (isAssignmentTrackingEnabled(*Caller->getParent())) {
2770       // Interpret inlined stores to caller-local variables as assignments.
2771       trackInlinedStores(FirstNewBlock, Caller->end(), CB);
2772 
2773       // Update DIAssignID metadata attachments and uses so that they are
2774       // unique to this inlined instance.
2775       fixupAssignments(FirstNewBlock, Caller->end());
2776     }
2777 
2778     // Now clone the inlined noalias scope metadata.
2779     SAMetadataCloner.clone();
2780     SAMetadataCloner.remap(FirstNewBlock, Caller->end());
2781 
2782     // Add noalias metadata if necessary.
2783     AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2784 
2785     // Clone return attributes on the callsite into the calls within the inlined
2786     // function which feed into its return value.
2787     AddReturnAttributes(CB, VMap, InlinedFunctionInfo);
2788 
2789     // Clone attributes on the params of the callsite to calls within the
2790     // inlined function which use the same param.
2791     AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
2792 
2793     propagateMemProfMetadata(CalledFunc, CB,
2794                              InlinedFunctionInfo.ContainsMemProfMetadata, VMap);
2795 
2796     // Propagate metadata on the callsite if necessary.
2797     PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());
2798 
2799     // Register any cloned assumptions.
2800     if (IFI.GetAssumptionCache)
2801       for (BasicBlock &NewBlock :
2802            make_range(FirstNewBlock->getIterator(), Caller->end()))
2803         for (Instruction &I : NewBlock)
2804           if (auto *II = dyn_cast<AssumeInst>(&I))
2805             IFI.GetAssumptionCache(*Caller).registerAssumption(II);
2806   }
2807 
2808   if (ConvergenceControlToken) {
2809     IntrinsicInst *IntrinsicCall = getConvergenceEntry(*FirstNewBlock);
2810     if (IntrinsicCall) {
2811       IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken);
2812       IntrinsicCall->eraseFromParent();
2813     }
2814   }
2815 
2816   // If there are any alloca instructions in the block that used to be the entry
2817   // block for the callee, move them to the entry block of the caller.  First
2818   // calculate which instruction they should be inserted before.  We insert the
2819   // instructions at the end of the current alloca list.
2820   {
2821     BasicBlock::iterator InsertPoint = Caller->begin()->begin();
2822     for (BasicBlock::iterator I = FirstNewBlock->begin(),
2823          E = FirstNewBlock->end(); I != E; ) {
2824       AllocaInst *AI = dyn_cast<AllocaInst>(I++);
2825       if (!AI) continue;
2826 
2827       // If the alloca is now dead, remove it.  This often occurs due to code
2828       // specialization.
2829       if (AI->use_empty()) {
2830         AI->eraseFromParent();
2831         continue;
2832       }
2833 
2834       if (!allocaWouldBeStaticInEntry(AI))
2835         continue;
2836 
2837       // Keep track of the static allocas that we inline into the caller.
2838       IFI.StaticAllocas.push_back(AI);
2839 
2840       // Scan for the block of allocas that we can move over, and move them
2841       // all at once.
2842       while (isa<AllocaInst>(I) &&
2843              !cast<AllocaInst>(I)->use_empty() &&
2844              allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
2845         IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
2846         ++I;
2847       }
2848 
2849       // Transfer all of the allocas over in a block.  Using splice means
2850       // that the instructions aren't removed from the symbol table, then
2851       // reinserted.
2852       I.setTailBit(true);
2853       Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock,
2854                                      AI->getIterator(), I);
2855     }
2856   }
2857 
2858   SmallVector<Value*,4> VarArgsToForward;
2859   SmallVector<AttributeSet, 4> VarArgsAttrs;
2860   for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
2861        i < CB.arg_size(); i++) {
2862     VarArgsToForward.push_back(CB.getArgOperand(i));
2863     VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i));
2864   }
2865 
2866   bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
2867   if (InlinedFunctionInfo.ContainsCalls) {
2868     CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
2869     if (CallInst *CI = dyn_cast<CallInst>(&CB))
2870       CallSiteTailKind = CI->getTailCallKind();
2871 
2872     // For inlining purposes, the "notail" marker is the same as no marker.
2873     if (CallSiteTailKind == CallInst::TCK_NoTail)
2874       CallSiteTailKind = CallInst::TCK_None;
2875 
2876     for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
2877          ++BB) {
2878       for (Instruction &I : llvm::make_early_inc_range(*BB)) {
2879         CallInst *CI = dyn_cast<CallInst>(&I);
2880         if (!CI)
2881           continue;
2882 
2883         // Forward varargs from inlined call site to calls to the
2884         // ForwardVarArgsTo function, if requested, and to musttail calls.
2885         if (!VarArgsToForward.empty() &&
2886             ((ForwardVarArgsTo &&
2887               CI->getCalledFunction() == ForwardVarArgsTo) ||
2888              CI->isMustTailCall())) {
2889           // Collect attributes for non-vararg parameters.
2890           AttributeList Attrs = CI->getAttributes();
2891           SmallVector<AttributeSet, 8> ArgAttrs;
2892           if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
2893             for (unsigned ArgNo = 0;
2894                  ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
2895               ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
2896           }
2897 
2898           // Add VarArg attributes.
2899           ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());
2900           Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(),
2901                                      Attrs.getRetAttrs(), ArgAttrs);
2902           // Add VarArgs to existing parameters.
2903           SmallVector<Value *, 6> Params(CI->args());
2904           Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
2905           CallInst *NewCI = CallInst::Create(
2906               CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI->getIterator());
2907           NewCI->setDebugLoc(CI->getDebugLoc());
2908           NewCI->setAttributes(Attrs);
2909           NewCI->setCallingConv(CI->getCallingConv());
2910           CI->replaceAllUsesWith(NewCI);
2911           CI->eraseFromParent();
2912           CI = NewCI;
2913         }
2914 
2915         if (Function *F = CI->getCalledFunction())
2916           InlinedDeoptimizeCalls |=
2917               F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
2918 
2919         // We need to reduce the strength of any inlined tail calls.  For
2920         // musttail, we have to avoid introducing potential unbounded stack
2921         // growth.  For example, if functions 'f' and 'g' are mutually recursive
2922         // with musttail, we can inline 'g' into 'f' so long as we preserve
2923         // musttail on the cloned call to 'f'.  If either the inlined call site
2924         // or the cloned call site is *not* musttail, the program already has
2925         // one frame of stack growth, so it's safe to remove musttail.  Here is
2926         // a table of example transformations:
2927         //
2928         //    f -> musttail g -> musttail f  ==>  f -> musttail f
2929         //    f -> musttail g ->     tail f  ==>  f ->     tail f
2930         //    f ->          g -> musttail f  ==>  f ->          f
2931         //    f ->          g ->     tail f  ==>  f ->          f
2932         //
2933         // Inlined notail calls should remain notail calls.
2934         CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
2935         if (ChildTCK != CallInst::TCK_NoTail)
2936           ChildTCK = std::min(CallSiteTailKind, ChildTCK);
2937         CI->setTailCallKind(ChildTCK);
2938         InlinedMustTailCalls |= CI->isMustTailCall();
2939 
2940         // Call sites inlined through a 'nounwind' call site should be
2941         // 'nounwind' as well. However, avoid marking call sites explicitly
2942         // where possible. This helps expose more opportunities for CSE after
2943         // inlining, commonly when the callee is an intrinsic.
2944         if (MarkNoUnwind && !CI->doesNotThrow())
2945           CI->setDoesNotThrow();
2946       }
2947     }
2948   }
2949 
2950   // Leave lifetime markers for the static alloca's, scoping them to the
2951   // function we just inlined.
2952   // We need to insert lifetime intrinsics even at O0 to avoid invalid
2953   // access caused by multithreaded coroutines. The check
2954   // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
2955   if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
2956       !IFI.StaticAllocas.empty()) {
2957     IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
2958     for (AllocaInst *AI : IFI.StaticAllocas) {
2959       // Don't mark swifterror allocas. They can't have bitcast uses.
2960       if (AI->isSwiftError())
2961         continue;
2962 
2963       // If the alloca is already scoped to something smaller than the whole
2964       // function then there's no need to add redundant, less accurate markers.
2965       if (hasLifetimeMarkers(AI))
2966         continue;
2967 
2968       // Try to determine the size of the allocation.
2969       ConstantInt *AllocaSize = nullptr;
2970       if (ConstantInt *AIArraySize =
2971           dyn_cast<ConstantInt>(AI->getArraySize())) {
2972         auto &DL = Caller->getDataLayout();
2973         Type *AllocaType = AI->getAllocatedType();
2974         TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
2975         uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
2976 
2977         // Don't add markers for zero-sized allocas.
2978         if (AllocaArraySize == 0)
2979           continue;
2980 
2981         // Check that array size doesn't saturate uint64_t and doesn't
2982         // overflow when it's multiplied by type size.
2983         if (!AllocaTypeSize.isScalable() &&
2984             AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
2985             std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
2986                 AllocaTypeSize.getFixedValue()) {
2987           AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
2988                                         AllocaArraySize * AllocaTypeSize);
2989         }
2990       }
2991 
2992       builder.CreateLifetimeStart(AI, AllocaSize);
2993       for (ReturnInst *RI : Returns) {
2994         // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
2995         // call and a return.  The return kills all local allocas.
2996         if (InlinedMustTailCalls &&
2997             RI->getParent()->getTerminatingMustTailCall())
2998           continue;
2999         if (InlinedDeoptimizeCalls &&
3000             RI->getParent()->getTerminatingDeoptimizeCall())
3001           continue;
3002         IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
3003       }
3004     }
3005   }
3006 
3007   // If the inlined code contained dynamic alloca instructions, wrap the inlined
3008   // code with llvm.stacksave/llvm.stackrestore intrinsics.
3009   if (InlinedFunctionInfo.ContainsDynamicAllocas) {
3010     // Insert the llvm.stacksave.
3011     CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
3012                              .CreateStackSave("savedstack");
3013 
3014     // Insert a call to llvm.stackrestore before any return instructions in the
3015     // inlined function.
3016     for (ReturnInst *RI : Returns) {
3017       // Don't insert llvm.stackrestore calls between a musttail or deoptimize
3018       // call and a return.  The return will restore the stack pointer.
3019       if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
3020         continue;
3021       if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
3022         continue;
3023       IRBuilder<>(RI).CreateStackRestore(SavedPtr);
3024     }
3025   }
3026 
3027   // If we are inlining for an invoke instruction, we must make sure to rewrite
3028   // any call instructions into invoke instructions.  This is sensitive to which
3029   // funclet pads were top-level in the inlinee, so must be done before
3030   // rewriting the "parent pad" links.
3031   if (auto *II = dyn_cast<InvokeInst>(&CB)) {
3032     BasicBlock *UnwindDest = II->getUnwindDest();
3033     BasicBlock::iterator FirstNonPHI = UnwindDest->getFirstNonPHIIt();
3034     if (isa<LandingPadInst>(FirstNonPHI)) {
3035       HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
3036     } else {
3037       HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
3038     }
3039   }
3040 
3041   // Update the lexical scopes of the new funclets and callsites.
3042   // Anything that had 'none' as its parent is now nested inside the callsite's
3043   // EHPad.
3044   if (CallSiteEHPad) {
3045     for (Function::iterator BB = FirstNewBlock->getIterator(),
3046                             E = Caller->end();
3047          BB != E; ++BB) {
3048       // Add bundle operands to inlined call sites.
3049       PropagateOperandBundles(BB, CallSiteEHPad);
3050 
3051       // It is problematic if the inlinee has a cleanupret which unwinds to
3052       // caller and we inline it into a call site which doesn't unwind but into
3053       // an EH pad that does.  Such an edge must be dynamically unreachable.
3054       // As such, we replace the cleanupret with unreachable.
3055       if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
3056         if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
3057           changeToUnreachable(CleanupRet);
3058 
3059       BasicBlock::iterator I = BB->getFirstNonPHIIt();
3060       if (!I->isEHPad())
3061         continue;
3062 
3063       if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
3064         if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
3065           CatchSwitch->setParentPad(CallSiteEHPad);
3066       } else {
3067         auto *FPI = cast<FuncletPadInst>(I);
3068         if (isa<ConstantTokenNone>(FPI->getParentPad()))
3069           FPI->setParentPad(CallSiteEHPad);
3070       }
3071     }
3072   }
3073 
3074   if (InlinedDeoptimizeCalls) {
3075     // We need to at least remove the deoptimizing returns from the Return set,
3076     // so that the control flow from those returns does not get merged into the
3077     // caller (but terminate it instead).  If the caller's return type does not
3078     // match the callee's return type, we also need to change the return type of
3079     // the intrinsic.
3080     if (Caller->getReturnType() == CB.getType()) {
3081       llvm::erase_if(Returns, [](ReturnInst *RI) {
3082         return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
3083       });
3084     } else {
3085       SmallVector<ReturnInst *, 8> NormalReturns;
3086       Function *NewDeoptIntrinsic = Intrinsic::getOrInsertDeclaration(
3087           Caller->getParent(), Intrinsic::experimental_deoptimize,
3088           {Caller->getReturnType()});
3089 
3090       for (ReturnInst *RI : Returns) {
3091         CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
3092         if (!DeoptCall) {
3093           NormalReturns.push_back(RI);
3094           continue;
3095         }
3096 
3097         // The calling convention on the deoptimize call itself may be bogus,
3098         // since the code we're inlining may have undefined behavior (and may
3099         // never actually execute at runtime); but all
3100         // @llvm.experimental.deoptimize declarations have to have the same
3101         // calling convention in a well-formed module.
3102         auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
3103         NewDeoptIntrinsic->setCallingConv(CallingConv);
3104         auto *CurBB = RI->getParent();
3105         RI->eraseFromParent();
3106 
3107         SmallVector<Value *, 4> CallArgs(DeoptCall->args());
3108 
3109         SmallVector<OperandBundleDef, 1> OpBundles;
3110         DeoptCall->getOperandBundlesAsDefs(OpBundles);
3111         auto DeoptAttributes = DeoptCall->getAttributes();
3112         DeoptCall->eraseFromParent();
3113         assert(!OpBundles.empty() &&
3114                "Expected at least the deopt operand bundle");
3115 
3116         IRBuilder<> Builder(CurBB);
3117         CallInst *NewDeoptCall =
3118             Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
3119         NewDeoptCall->setCallingConv(CallingConv);
3120         NewDeoptCall->setAttributes(DeoptAttributes);
3121         if (NewDeoptCall->getType()->isVoidTy())
3122           Builder.CreateRetVoid();
3123         else
3124           Builder.CreateRet(NewDeoptCall);
3125         // Since the ret type is changed, remove the incompatible attributes.
3126         NewDeoptCall->removeRetAttrs(AttributeFuncs::typeIncompatible(
3127             NewDeoptCall->getType(), NewDeoptCall->getRetAttributes()));
3128       }
3129 
3130       // Leave behind the normal returns so we can merge control flow.
3131       std::swap(Returns, NormalReturns);
3132     }
3133   }
3134 
3135   // Handle any inlined musttail call sites.  In order for a new call site to be
3136   // musttail, the source of the clone and the inlined call site must have been
3137   // musttail.  Therefore it's safe to return without merging control into the
3138   // phi below.
3139   if (InlinedMustTailCalls) {
3140     // Check if we need to bitcast the result of any musttail calls.
3141     Type *NewRetTy = Caller->getReturnType();
3142     bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
3143 
3144     // Handle the returns preceded by musttail calls separately.
3145     SmallVector<ReturnInst *, 8> NormalReturns;
3146     for (ReturnInst *RI : Returns) {
3147       CallInst *ReturnedMustTail =
3148           RI->getParent()->getTerminatingMustTailCall();
3149       if (!ReturnedMustTail) {
3150         NormalReturns.push_back(RI);
3151         continue;
3152       }
3153       if (!NeedBitCast)
3154         continue;
3155 
3156       // Delete the old return and any preceding bitcast.
3157       BasicBlock *CurBB = RI->getParent();
3158       auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
3159       RI->eraseFromParent();
3160       if (OldCast)
3161         OldCast->eraseFromParent();
3162 
3163       // Insert a new bitcast and return with the right type.
3164       IRBuilder<> Builder(CurBB);
3165       Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
3166     }
3167 
3168     // Leave behind the normal returns so we can merge control flow.
3169     std::swap(Returns, NormalReturns);
3170   }
3171 
3172   // Now that all of the transforms on the inlined code have taken place but
3173   // before we splice the inlined code into the CFG and lose track of which
3174   // blocks were actually inlined, collect the call sites. We only do this if
3175   // call graph updates weren't requested, as those provide value handle based
3176   // tracking of inlined call sites instead. Calls to intrinsics are not
3177   // collected because they are not inlineable.
3178   if (InlinedFunctionInfo.ContainsCalls) {
3179     // Otherwise just collect the raw call sites that were inlined.
3180     for (BasicBlock &NewBB :
3181          make_range(FirstNewBlock->getIterator(), Caller->end()))
3182       for (Instruction &I : NewBB)
3183         if (auto *CB = dyn_cast<CallBase>(&I))
3184           if (!(CB->getCalledFunction() &&
3185                 CB->getCalledFunction()->isIntrinsic()))
3186             IFI.InlinedCallSites.push_back(CB);
3187   }
3188 
3189   // If we cloned in _exactly one_ basic block, and if that block ends in a
3190   // return instruction, we splice the body of the inlined callee directly into
3191   // the calling basic block.
3192   if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
3193     // Move all of the instructions right before the call.
3194     OrigBB->splice(CB.getIterator(), &*FirstNewBlock, FirstNewBlock->begin(),
3195                    FirstNewBlock->end());
3196     // Remove the cloned basic block.
3197     Caller->back().eraseFromParent();
3198 
3199     // If the call site was an invoke instruction, add a branch to the normal
3200     // destination.
3201     if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
3202       BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), CB.getIterator());
3203       NewBr->setDebugLoc(Returns[0]->getDebugLoc());
3204     }
3205 
3206     // If the return instruction returned a value, replace uses of the call with
3207     // uses of the returned value.
3208     if (!CB.use_empty()) {
3209       ReturnInst *R = Returns[0];
3210       if (&CB == R->getReturnValue())
3211         CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
3212       else
3213         CB.replaceAllUsesWith(R->getReturnValue());
3214     }
3215     // Since we are now done with the Call/Invoke, we can delete it.
3216     CB.eraseFromParent();
3217 
3218     // Since we are now done with the return instruction, delete it also.
3219     Returns[0]->eraseFromParent();
3220 
3221     if (MergeAttributes)
3222       AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc);
3223 
3224     // We are now done with the inlining.
3225     return InlineResult::success();
3226   }
3227 
3228   // Otherwise, we have the normal case, of more than one block to inline or
3229   // multiple return sites.
3230 
3231   // We want to clone the entire callee function into the hole between the
3232   // "starter" and "ender" blocks.  How we accomplish this depends on whether
3233   // this is an invoke instruction or a call instruction.
3234   BasicBlock *AfterCallBB;
3235   BranchInst *CreatedBranchToNormalDest = nullptr;
3236   if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
3237 
3238     // Add an unconditional branch to make this look like the CallInst case...
3239     CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), CB.getIterator());
3240 
3241     // Split the basic block.  This guarantees that no PHI nodes will have to be
3242     // updated due to new incoming edges, and make the invoke case more
3243     // symmetric to the call case.
3244     AfterCallBB =
3245         OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
3246                                 CalledFunc->getName() + ".exit");
3247 
3248   } else { // It's a call
3249     // If this is a call instruction, we need to split the basic block that
3250     // the call lives in.
3251     //
3252     AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(),
3253                                           CalledFunc->getName() + ".exit");
3254   }
3255 
3256   if (IFI.CallerBFI) {
3257     // Copy original BB's block frequency to AfterCallBB
3258     IFI.CallerBFI->setBlockFreq(AfterCallBB,
3259                                 IFI.CallerBFI->getBlockFreq(OrigBB));
3260   }
3261 
3262   // Change the branch that used to go to AfterCallBB to branch to the first
3263   // basic block of the inlined function.
3264   //
3265   Instruction *Br = OrigBB->getTerminator();
3266   assert(Br && Br->getOpcode() == Instruction::Br &&
3267          "splitBasicBlock broken!");
3268   Br->setOperand(0, &*FirstNewBlock);
3269 
3270   // Now that the function is correct, make it a little bit nicer.  In
3271   // particular, move the basic blocks inserted from the end of the function
3272   // into the space made by splitting the source basic block.
3273   Caller->splice(AfterCallBB->getIterator(), Caller, FirstNewBlock,
3274                  Caller->end());
3275 
3276   // Handle all of the return instructions that we just cloned in, and eliminate
3277   // any users of the original call/invoke instruction.
3278   Type *RTy = CalledFunc->getReturnType();
3279 
3280   PHINode *PHI = nullptr;
3281   if (Returns.size() > 1) {
3282     // The PHI node should go at the front of the new basic block to merge all
3283     // possible incoming values.
3284     if (!CB.use_empty()) {
3285       PHI = PHINode::Create(RTy, Returns.size(), CB.getName());
3286       PHI->insertBefore(AfterCallBB->begin());
3287       // Anything that used the result of the function call should now use the
3288       // PHI node as their operand.
3289       CB.replaceAllUsesWith(PHI);
3290     }
3291 
3292     // Loop over all of the return instructions adding entries to the PHI node
3293     // as appropriate.
3294     if (PHI) {
3295       for (ReturnInst *RI : Returns) {
3296         assert(RI->getReturnValue()->getType() == PHI->getType() &&
3297                "Ret value not consistent in function!");
3298         PHI->addIncoming(RI->getReturnValue(), RI->getParent());
3299       }
3300     }
3301 
3302     // Add a branch to the merge points and remove return instructions.
3303     DebugLoc Loc;
3304     for (ReturnInst *RI : Returns) {
3305       BranchInst *BI = BranchInst::Create(AfterCallBB, RI->getIterator());
3306       Loc = RI->getDebugLoc();
3307       BI->setDebugLoc(Loc);
3308       RI->eraseFromParent();
3309     }
3310     // We need to set the debug location to *somewhere* inside the
3311     // inlined function. The line number may be nonsensical, but the
3312     // instruction will at least be associated with the right
3313     // function.
3314     if (CreatedBranchToNormalDest)
3315       CreatedBranchToNormalDest->setDebugLoc(Loc);
3316   } else if (!Returns.empty()) {
3317     // Otherwise, if there is exactly one return value, just replace anything
3318     // using the return value of the call with the computed value.
3319     if (!CB.use_empty()) {
3320       if (&CB == Returns[0]->getReturnValue())
3321         CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
3322       else
3323         CB.replaceAllUsesWith(Returns[0]->getReturnValue());
3324     }
3325 
3326     // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
3327     BasicBlock *ReturnBB = Returns[0]->getParent();
3328     ReturnBB->replaceAllUsesWith(AfterCallBB);
3329 
3330     // Splice the code from the return block into the block that it will return
3331     // to, which contains the code that was after the call.
3332     AfterCallBB->splice(AfterCallBB->begin(), ReturnBB);
3333 
3334     if (CreatedBranchToNormalDest)
3335       CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
3336 
3337     // Delete the return instruction now and empty ReturnBB now.
3338     Returns[0]->eraseFromParent();
3339     ReturnBB->eraseFromParent();
3340   } else if (!CB.use_empty()) {
3341     // No returns, but something is using the return value of the call.  Just
3342     // nuke the result.
3343     CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
3344   }
3345 
3346   // Since we are now done with the Call/Invoke, we can delete it.
3347   CB.eraseFromParent();
3348 
3349   // If we inlined any musttail calls and the original return is now
3350   // unreachable, delete it.  It can only contain a bitcast and ret.
3351   if (InlinedMustTailCalls && pred_empty(AfterCallBB))
3352     AfterCallBB->eraseFromParent();
3353 
3354   // We should always be able to fold the entry block of the function into the
3355   // single predecessor of the block...
3356   assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
3357   BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
3358 
3359   // Splice the code entry block into calling block, right before the
3360   // unconditional branch.
3361   CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
3362   OrigBB->splice(Br->getIterator(), CalleeEntry);
3363 
3364   // Remove the unconditional branch.
3365   Br->eraseFromParent();
3366 
3367   // Now we can remove the CalleeEntry block, which is now empty.
3368   CalleeEntry->eraseFromParent();
3369 
3370   // If we inserted a phi node, check to see if it has a single value (e.g. all
3371   // the entries are the same or undef).  If so, remove the PHI so it doesn't
3372   // block other optimizations.
3373   if (PHI) {
3374     AssumptionCache *AC =
3375         IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
3376     auto &DL = Caller->getDataLayout();
3377     if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
3378       PHI->replaceAllUsesWith(V);
3379       PHI->eraseFromParent();
3380     }
3381   }
3382 
3383   if (MergeAttributes)
3384     AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc);
3385 
3386   return InlineResult::success();
3387 }
3388