xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp (revision 753f127f3ace09432b2baeffd71a308760641a62)
1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass promotes "by reference" arguments to be "by value" arguments.  In
10 // practice, this means looking for internal functions that have pointer
11 // arguments.  If it can prove, through the use of alias analysis, that an
12 // argument is *only* loaded, then it can pass the value into the function
13 // instead of the address of the value.  This can cause recursive simplification
14 // of code and lead to the elimination of allocas (especially in C++ template
15 // code like the STL).
16 //
17 // This pass also handles aggregate arguments that are passed into a function,
18 // scalarizing them if the elements of the aggregate are only loaded.  Note that
19 // by default it refuses to scalarize aggregates which would require passing in
20 // more than three operands to the function, because passing thousands of
21 // operands for a large array or structure is unprofitable! This limit can be
22 // configured or disabled, however.
23 //
24 // Note that this transformation could also be done for arguments that are only
25 // stored to (returning the value instead), but does not currently.  This case
26 // would be best handled when and if LLVM begins supporting multiple return
27 // values from functions.
28 //
29 //===----------------------------------------------------------------------===//
30 
31 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
32 
33 #include "llvm/ADT/DepthFirstIterator.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/ScopeExit.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/Twine.h"
40 #include "llvm/Analysis/AssumptionCache.h"
41 #include "llvm/Analysis/BasicAliasAnalysis.h"
42 #include "llvm/Analysis/CallGraph.h"
43 #include "llvm/Analysis/Loads.h"
44 #include "llvm/Analysis/MemoryLocation.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/IR/Argument.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/BasicBlock.h"
50 #include "llvm/IR/CFG.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/Metadata.h"
61 #include "llvm/IR/NoFolder.h"
62 #include "llvm/IR/PassManager.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
71 #include <algorithm>
72 #include <cassert>
73 #include <cstdint>
74 #include <utility>
75 #include <vector>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "argpromotion"
80 
81 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
82 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
83 
84 namespace {
85 
86 struct ArgPart {
87   Type *Ty;
88   Align Alignment;
89   /// A representative guaranteed-executed load or store instruction for use by
90   /// metadata transfer.
91   Instruction *MustExecInstr;
92 };
93 
94 using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
95 
96 } // end anonymous namespace
97 
98 static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
99                             Value *Ptr, Type *ResElemTy, int64_t Offset) {
100   // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise
101   // fall back to an i8 GEP to a specific offset.
102   unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
103   APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
104   if (!Ptr->getType()->isOpaquePointerTy()) {
105     Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType();
106     if (OrigOffset == 0 && OrigElemTy == ResElemTy)
107       return Ptr;
108 
109     if (OrigElemTy->isSized()) {
110       APInt TmpOffset = OrigOffset;
111       Type *TmpTy = OrigElemTy;
112       SmallVector<APInt> IntIndices =
113           DL.getGEPIndicesForOffset(TmpTy, TmpOffset);
114       if (TmpOffset == 0) {
115         // Try to add trailing zero indices to reach the right type.
116         while (TmpTy != ResElemTy) {
117           Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0);
118           if (!NextTy)
119             break;
120 
121           IntIndices.push_back(APInt::getZero(
122               isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth()));
123           TmpTy = NextTy;
124         }
125 
126         SmallVector<Value *> Indices;
127         for (const APInt &Index : IntIndices)
128           Indices.push_back(IRB.getInt(Index));
129 
130         if (OrigOffset != 0 || TmpTy == ResElemTy) {
131           Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices);
132           return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
133         }
134       }
135     }
136   }
137 
138   if (OrigOffset != 0) {
139     Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace));
140     Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset));
141   }
142   return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
143 }
144 
145 /// DoPromotion - This method actually performs the promotion of the specified
146 /// arguments, and returns the new function.  At this point, we know that it's
147 /// safe to do so.
148 static Function *
149 doPromotion(Function *F, FunctionAnalysisManager &FAM,
150             const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
151                 &ArgsToPromote) {
152   // Start by computing a new prototype for the function, which is the same as
153   // the old function, but has modified arguments.
154   FunctionType *FTy = F->getFunctionType();
155   std::vector<Type *> Params;
156 
157   // Attribute - Keep track of the parameter attributes for the arguments
158   // that we are *not* promoting. For the ones that we do promote, the parameter
159   // attributes are lost
160   SmallVector<AttributeSet, 8> ArgAttrVec;
161   AttributeList PAL = F->getAttributes();
162 
163   // First, determine the new argument list
164   unsigned ArgNo = 0;
165   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
166        ++I, ++ArgNo) {
167     if (!ArgsToPromote.count(&*I)) {
168       // Unchanged argument
169       Params.push_back(I->getType());
170       ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
171     } else if (I->use_empty()) {
172       // Dead argument (which are always marked as promotable)
173       ++NumArgumentsDead;
174     } else {
175       const auto &ArgParts = ArgsToPromote.find(&*I)->second;
176       for (const auto &Pair : ArgParts) {
177         Params.push_back(Pair.second.Ty);
178         ArgAttrVec.push_back(AttributeSet());
179       }
180       ++NumArgumentsPromoted;
181     }
182   }
183 
184   Type *RetTy = FTy->getReturnType();
185 
186   // Construct the new function type using the new arguments.
187   FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
188 
189   // Create the new function body and insert it into the module.
190   Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
191                                   F->getName());
192   NF->copyAttributesFrom(F);
193   NF->copyMetadata(F, 0);
194 
195   // The new function will have the !dbg metadata copied from the original
196   // function. The original function may not be deleted, and dbg metadata need
197   // to be unique, so we need to drop it.
198   F->setSubprogram(nullptr);
199 
200   LLVM_DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
201                     << "From: " << *F);
202 
203   uint64_t LargestVectorWidth = 0;
204   for (auto *I : Params)
205     if (auto *VT = dyn_cast<llvm::VectorType>(I))
206       LargestVectorWidth = std::max(
207           LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
208 
209   // Recompute the parameter attributes list based on the new arguments for
210   // the function.
211   NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
212                                        PAL.getRetAttrs(), ArgAttrVec));
213   AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
214   ArgAttrVec.clear();
215 
216   F->getParent()->getFunctionList().insert(F->getIterator(), NF);
217   NF->takeName(F);
218 
219   // Loop over all the callers of the function, transforming the call sites to
220   // pass in the loaded pointers.
221   SmallVector<Value *, 16> Args;
222   const DataLayout &DL = F->getParent()->getDataLayout();
223   while (!F->use_empty()) {
224     CallBase &CB = cast<CallBase>(*F->user_back());
225     assert(CB.getCalledFunction() == F);
226     const AttributeList &CallPAL = CB.getAttributes();
227     IRBuilder<NoFolder> IRB(&CB);
228 
229     // Loop over the operands, inserting GEP and loads in the caller as
230     // appropriate.
231     auto *AI = CB.arg_begin();
232     ArgNo = 0;
233     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
234          ++I, ++AI, ++ArgNo) {
235       if (!ArgsToPromote.count(&*I)) {
236         Args.push_back(*AI); // Unmodified argument
237         ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
238       } else if (!I->use_empty()) {
239         Value *V = *AI;
240         const auto &ArgParts = ArgsToPromote.find(&*I)->second;
241         for (const auto &Pair : ArgParts) {
242           LoadInst *LI = IRB.CreateAlignedLoad(
243               Pair.second.Ty,
244               createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),
245               Pair.second.Alignment, V->getName() + ".val");
246           if (Pair.second.MustExecInstr) {
247             LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
248             LI->copyMetadata(*Pair.second.MustExecInstr,
249                              {LLVMContext::MD_range, LLVMContext::MD_nonnull,
250                               LLVMContext::MD_dereferenceable,
251                               LLVMContext::MD_dereferenceable_or_null,
252                               LLVMContext::MD_align, LLVMContext::MD_noundef});
253           }
254           Args.push_back(LI);
255           ArgAttrVec.push_back(AttributeSet());
256         }
257       }
258     }
259 
260     // Push any varargs arguments on the list.
261     for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
262       Args.push_back(*AI);
263       ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
264     }
265 
266     SmallVector<OperandBundleDef, 1> OpBundles;
267     CB.getOperandBundlesAsDefs(OpBundles);
268 
269     CallBase *NewCS = nullptr;
270     if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
271       NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
272                                  Args, OpBundles, "", &CB);
273     } else {
274       auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB);
275       NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());
276       NewCS = NewCall;
277     }
278     NewCS->setCallingConv(CB.getCallingConv());
279     NewCS->setAttributes(AttributeList::get(F->getContext(),
280                                             CallPAL.getFnAttrs(),
281                                             CallPAL.getRetAttrs(), ArgAttrVec));
282     NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
283     Args.clear();
284     ArgAttrVec.clear();
285 
286     AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),
287                                                   LargestVectorWidth);
288 
289     if (!CB.use_empty()) {
290       CB.replaceAllUsesWith(NewCS);
291       NewCS->takeName(&CB);
292     }
293 
294     // Finally, remove the old call from the program, reducing the use-count of
295     // F.
296     CB.eraseFromParent();
297   }
298 
299   // Since we have now created the new function, splice the body of the old
300   // function right into the new function, leaving the old rotting hulk of the
301   // function empty.
302   NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
303 
304   // We will collect all the new created allocas to promote them into registers
305   // after the following loop
306   SmallVector<AllocaInst *, 4> Allocas;
307 
308   // Loop over the argument list, transferring uses of the old arguments over to
309   // the new arguments, also transferring over the names as well.
310   Function::arg_iterator I2 = NF->arg_begin();
311   for (Argument &Arg : F->args()) {
312     if (!ArgsToPromote.count(&Arg)) {
313       // If this is an unmodified argument, move the name and users over to the
314       // new version.
315       Arg.replaceAllUsesWith(&*I2);
316       I2->takeName(&Arg);
317       ++I2;
318       continue;
319     }
320 
321     // There potentially are metadata uses for things like llvm.dbg.value.
322     // Replace them with undef, after handling the other regular uses.
323     auto RauwUndefMetadata = make_scope_exit(
324         [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
325 
326     if (Arg.use_empty())
327       continue;
328 
329     // Otherwise, if we promoted this argument, we have to create an alloca in
330     // the callee for every promotable part and store each of the new incoming
331     // arguments into the corresponding alloca, what lets the old code (the
332     // store instructions if they are allowed especially) a chance to work as
333     // before.
334     assert(Arg.getType()->isPointerTy() &&
335            "Only arguments with a pointer type are promotable");
336 
337     IRBuilder<NoFolder> IRB(&NF->begin()->front());
338 
339     // Add only the promoted elements, so parts from ArgsToPromote
340     SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
341     for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {
342       int64_t Offset = Pair.first;
343       const ArgPart &Part = Pair.second;
344 
345       Argument *NewArg = I2++;
346       NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
347 
348       AllocaInst *NewAlloca = IRB.CreateAlloca(
349           Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");
350       NewAlloca->setAlignment(Pair.second.Alignment);
351       IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);
352 
353       // Collect the alloca to retarget the users to
354       OffsetToAlloca.insert({Offset, NewAlloca});
355     }
356 
357     auto GetAlloca = [&](Value *Ptr) {
358       APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
359       Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
360                                                    /* AllowNonInbounds */ true);
361       assert(Ptr == &Arg && "Not constant offset from arg?");
362       return OffsetToAlloca.lookup(Offset.getSExtValue());
363     };
364 
365     // Cleanup the code from the dead instructions: GEPs and BitCasts in between
366     // the original argument and its users: loads and stores. Retarget every
367     // user to the new created alloca.
368     SmallVector<Value *, 16> Worklist;
369     SmallVector<Instruction *, 16> DeadInsts;
370     append_range(Worklist, Arg.users());
371     while (!Worklist.empty()) {
372       Value *V = Worklist.pop_back_val();
373       if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {
374         DeadInsts.push_back(cast<Instruction>(V));
375         append_range(Worklist, V->users());
376         continue;
377       }
378 
379       if (auto *LI = dyn_cast<LoadInst>(V)) {
380         Value *Ptr = LI->getPointerOperand();
381         LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));
382         continue;
383       }
384 
385       if (auto *SI = dyn_cast<StoreInst>(V)) {
386         assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
387         Value *Ptr = SI->getPointerOperand();
388         SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));
389         continue;
390       }
391 
392       llvm_unreachable("Unexpected user");
393     }
394 
395     for (Instruction *I : DeadInsts) {
396       I->replaceAllUsesWith(PoisonValue::get(I->getType()));
397       I->eraseFromParent();
398     }
399 
400     // Collect the allocas for promotion
401     for (const auto &Pair : OffsetToAlloca) {
402       assert(isAllocaPromotable(Pair.second) &&
403              "By design, only promotable allocas should be produced.");
404       Allocas.push_back(Pair.second);
405     }
406   }
407 
408   LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
409                     << " alloca(s) are promotable by Mem2Reg\n");
410 
411   if (!Allocas.empty()) {
412     // And we are able to call the `promoteMemoryToRegister()` function.
413     // Our earlier checks have ensured that PromoteMemToReg() will
414     // succeed.
415     auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF);
416     auto &AC = FAM.getResult<AssumptionAnalysis>(*NF);
417     PromoteMemToReg(Allocas, DT, &AC);
418   }
419 
420   return NF;
421 }
422 
423 /// Return true if we can prove that all callees pass in a valid pointer for the
424 /// specified function argument.
425 static bool allCallersPassValidPointerForArgument(Argument *Arg,
426                                                   Align NeededAlign,
427                                                   uint64_t NeededDerefBytes) {
428   Function *Callee = Arg->getParent();
429   const DataLayout &DL = Callee->getParent()->getDataLayout();
430   APInt Bytes(64, NeededDerefBytes);
431 
432   // Check if the argument itself is marked dereferenceable and aligned.
433   if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
434     return true;
435 
436   // Look at all call sites of the function.  At this point we know we only have
437   // direct callees.
438   return all_of(Callee->users(), [&](User *U) {
439     CallBase &CB = cast<CallBase>(*U);
440     return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
441                                               NeededAlign, Bytes, DL);
442   });
443 }
444 
445 /// Determine that this argument is safe to promote, and find the argument
446 /// parts it can be promoted into.
447 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
448                          unsigned MaxElements, bool IsRecursive,
449                          SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
450   // Quick exit for unused arguments
451   if (Arg->use_empty())
452     return true;
453 
454   // We can only promote this argument if all the uses are loads at known
455   // offsets.
456   //
457   // Promoting the argument causes it to be loaded in the caller
458   // unconditionally. This is only safe if we can prove that either the load
459   // would have happened in the callee anyway (ie, there is a load in the entry
460   // block) or the pointer passed in at every call site is guaranteed to be
461   // valid.
462   // In the former case, invalid loads can happen, but would have happened
463   // anyway, in the latter case, invalid loads won't happen. This prevents us
464   // from introducing an invalid load that wouldn't have happened in the
465   // original code.
466 
467   SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
468   Align NeededAlign(1);
469   uint64_t NeededDerefBytes = 0;
470 
471   // And if this is a byval argument we also allow to have store instructions.
472   // Only handle in such way arguments with specified alignment;
473   // if it's unspecified, the actual alignment of the argument is
474   // target-specific.
475   bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
476 
477   // An end user of a pointer argument is a load or store instruction.
478   // Returns None if this load or store is not based on the argument. Return
479   // true if we can promote the instruction, false otherwise.
480   auto HandleEndUser = [&](auto *I, Type *Ty,
481                            bool GuaranteedToExecute) -> Optional<bool> {
482     // Don't promote volatile or atomic instructions.
483     if (!I->isSimple())
484       return false;
485 
486     Value *Ptr = I->getPointerOperand();
487     APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
488     Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
489                                                  /* AllowNonInbounds */ true);
490     if (Ptr != Arg)
491       return None;
492 
493     if (Offset.getSignificantBits() >= 64)
494       return false;
495 
496     TypeSize Size = DL.getTypeStoreSize(Ty);
497     // Don't try to promote scalable types.
498     if (Size.isScalable())
499       return false;
500 
501     // If this is a recursive function and one of the types is a pointer,
502     // then promoting it might lead to recursive promotion.
503     if (IsRecursive && Ty->isPointerTy())
504       return false;
505 
506     int64_t Off = Offset.getSExtValue();
507     auto Pair = ArgParts.try_emplace(
508         Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
509     ArgPart &Part = Pair.first->second;
510     bool OffsetNotSeenBefore = Pair.second;
511 
512     // We limit promotion to only promoting up to a fixed number of elements of
513     // the aggregate.
514     if (MaxElements > 0 && ArgParts.size() > MaxElements) {
515       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
516                         << "more than " << MaxElements << " parts\n");
517       return false;
518     }
519 
520     // For now, we only support loading/storing one specific type at a given
521     // offset.
522     if (Part.Ty != Ty) {
523       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
524                         << "accessed as both " << *Part.Ty << " and " << *Ty
525                         << " at offset " << Off << "\n");
526       return false;
527     }
528 
529     // If this instruction is not guaranteed to execute, and we haven't seen a
530     // load or store at this offset before (or it had lower alignment), then we
531     // need to remember that requirement.
532     // Note that skipping instructions of previously seen offsets is only
533     // correct because we only allow a single type for a given offset, which
534     // also means that the number of accessed bytes will be the same.
535     if (!GuaranteedToExecute &&
536         (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
537       // We won't be able to prove dereferenceability for negative offsets.
538       if (Off < 0)
539         return false;
540 
541       // If the offset is not aligned, an aligned base pointer won't help.
542       if (!isAligned(I->getAlign(), Off))
543         return false;
544 
545       NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());
546       NeededAlign = std::max(NeededAlign, I->getAlign());
547     }
548 
549     Part.Alignment = std::max(Part.Alignment, I->getAlign());
550     return true;
551   };
552 
553   // Look for loads and stores that are guaranteed to execute on entry.
554   for (Instruction &I : Arg->getParent()->getEntryBlock()) {
555     Optional<bool> Res{};
556     if (LoadInst *LI = dyn_cast<LoadInst>(&I))
557       Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
558     else if (StoreInst *SI = dyn_cast<StoreInst>(&I))
559       Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
560                           /* GuaranteedToExecute */ true);
561     if (Res && !*Res)
562       return false;
563 
564     if (!isGuaranteedToTransferExecutionToSuccessor(&I))
565       break;
566   }
567 
568   // Now look at all loads of the argument. Remember the load instructions
569   // for the aliasing check below.
570   SmallVector<const Use *, 16> Worklist;
571   SmallPtrSet<const Use *, 16> Visited;
572   SmallVector<LoadInst *, 16> Loads;
573   auto AppendUses = [&](const Value *V) {
574     for (const Use &U : V->uses())
575       if (Visited.insert(&U).second)
576         Worklist.push_back(&U);
577   };
578   AppendUses(Arg);
579   while (!Worklist.empty()) {
580     const Use *U = Worklist.pop_back_val();
581     Value *V = U->getUser();
582     if (isa<BitCastInst>(V)) {
583       AppendUses(V);
584       continue;
585     }
586 
587     if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
588       if (!GEP->hasAllConstantIndices())
589         return false;
590       AppendUses(V);
591       continue;
592     }
593 
594     if (auto *LI = dyn_cast<LoadInst>(V)) {
595       if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
596         return false;
597       Loads.push_back(LI);
598       continue;
599     }
600 
601     // Stores are allowed for byval arguments
602     auto *SI = dyn_cast<StoreInst>(V);
603     if (AreStoresAllowed && SI &&
604         U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
605       if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
606                           /* GuaranteedToExecute */ false))
607         return false;
608       continue;
609       // Only stores TO the argument is allowed, all the other stores are
610       // unknown users
611     }
612 
613     // Unknown user.
614     LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
615                       << "unknown user " << *V << "\n");
616     return false;
617   }
618 
619   if (NeededDerefBytes || NeededAlign > 1) {
620     // Try to prove a required deref / aligned requirement.
621     if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
622                                                NeededDerefBytes)) {
623       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
624                         << "not dereferenceable or aligned\n");
625       return false;
626     }
627   }
628 
629   if (ArgParts.empty())
630     return true; // No users, this is a dead argument.
631 
632   // Sort parts by offset.
633   append_range(ArgPartsVec, ArgParts);
634   sort(ArgPartsVec,
635        [](const auto &A, const auto &B) { return A.first < B.first; });
636 
637   // Make sure the parts are non-overlapping.
638   int64_t Offset = ArgPartsVec[0].first;
639   for (const auto &Pair : ArgPartsVec) {
640     if (Pair.first < Offset)
641       return false; // Overlap with previous part.
642 
643     Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);
644   }
645 
646   // If store instructions are allowed, the path from the entry of the function
647   // to each load may be not free of instructions that potentially invalidate
648   // the load, and this is an admissible situation.
649   if (AreStoresAllowed)
650     return true;
651 
652   // Okay, now we know that the argument is only used by load instructions, and
653   // it is safe to unconditionally perform all of them. Use alias analysis to
654   // check to see if the pointer is guaranteed to not be modified from entry of
655   // the function to each of the load instructions.
656 
657   // Because there could be several/many load instructions, remember which
658   // blocks we know to be transparent to the load.
659   df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
660 
661   for (LoadInst *Load : Loads) {
662     // Check to see if the load is invalidated from the start of the block to
663     // the load itself.
664     BasicBlock *BB = Load->getParent();
665 
666     MemoryLocation Loc = MemoryLocation::get(Load);
667     if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod))
668       return false; // Pointer is invalidated!
669 
670     // Now check every path from the entry block to the load for transparency.
671     // To do this, we perform a depth first search on the inverse CFG from the
672     // loading block.
673     for (BasicBlock *P : predecessors(BB)) {
674       for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
675         if (AAR.canBasicBlockModify(*TranspBB, Loc))
676           return false;
677     }
678   }
679 
680   // If the path from the entry of the function to each load is free of
681   // instructions that potentially invalidate the load, we can make the
682   // transformation!
683   return true;
684 }
685 
686 /// Check if callers and callee agree on how promoted arguments would be
687 /// passed.
688 static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
689                                   const TargetTransformInfo &TTI) {
690   return all_of(F.uses(), [&](const Use &U) {
691     CallBase *CB = dyn_cast<CallBase>(U.getUser());
692     if (!CB)
693       return false;
694 
695     const Function *Caller = CB->getCaller();
696     const Function *Callee = CB->getCalledFunction();
697     return TTI.areTypesABICompatible(Caller, Callee, Types);
698   });
699 }
700 
701 /// PromoteArguments - This method checks the specified function to see if there
702 /// are any promotable arguments and if it is safe to promote the function (for
703 /// example, all callers are direct).  If safe to promote some arguments, it
704 /// calls the DoPromotion method.
705 static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
706                                   unsigned MaxElements, bool IsRecursive) {
707   // Don't perform argument promotion for naked functions; otherwise we can end
708   // up removing parameters that are seemingly 'not used' as they are referred
709   // to in the assembly.
710   if (F->hasFnAttribute(Attribute::Naked))
711     return nullptr;
712 
713   // Make sure that it is local to this module.
714   if (!F->hasLocalLinkage())
715     return nullptr;
716 
717   // Don't promote arguments for variadic functions. Adding, removing, or
718   // changing non-pack parameters can change the classification of pack
719   // parameters. Frontends encode that classification at the call site in the
720   // IR, while in the callee the classification is determined dynamically based
721   // on the number of registers consumed so far.
722   if (F->isVarArg())
723     return nullptr;
724 
725   // Don't transform functions that receive inallocas, as the transformation may
726   // not be safe depending on calling convention.
727   if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
728     return nullptr;
729 
730   // First check: see if there are any pointer arguments!  If not, quick exit.
731   SmallVector<Argument *, 16> PointerArgs;
732   for (Argument &I : F->args())
733     if (I.getType()->isPointerTy())
734       PointerArgs.push_back(&I);
735   if (PointerArgs.empty())
736     return nullptr;
737 
738   // Second check: make sure that all callers are direct callers.  We can't
739   // transform functions that have indirect callers.  Also see if the function
740   // is self-recursive.
741   for (Use &U : F->uses()) {
742     CallBase *CB = dyn_cast<CallBase>(U.getUser());
743     // Must be a direct call.
744     if (CB == nullptr || !CB->isCallee(&U) ||
745         CB->getFunctionType() != F->getFunctionType())
746       return nullptr;
747 
748     // Can't change signature of musttail callee
749     if (CB->isMustTailCall())
750       return nullptr;
751 
752     if (CB->getFunction() == F)
753       IsRecursive = true;
754   }
755 
756   // Can't change signature of musttail caller
757   // FIXME: Support promoting whole chain of musttail functions
758   for (BasicBlock &BB : *F)
759     if (BB.getTerminatingMustTailCall())
760       return nullptr;
761 
762   const DataLayout &DL = F->getParent()->getDataLayout();
763   auto &AAR = FAM.getResult<AAManager>(*F);
764   const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F);
765 
766   // Check to see which arguments are promotable.  If an argument is promotable,
767   // add it to ArgsToPromote.
768   DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
769   for (Argument *PtrArg : PointerArgs) {
770     // Replace sret attribute with noalias. This reduces register pressure by
771     // avoiding a register copy.
772     if (PtrArg->hasStructRetAttr()) {
773       unsigned ArgNo = PtrArg->getArgNo();
774       F->removeParamAttr(ArgNo, Attribute::StructRet);
775       F->addParamAttr(ArgNo, Attribute::NoAlias);
776       for (Use &U : F->uses()) {
777         CallBase &CB = cast<CallBase>(*U.getUser());
778         CB.removeParamAttr(ArgNo, Attribute::StructRet);
779         CB.addParamAttr(ArgNo, Attribute::NoAlias);
780       }
781     }
782 
783     // If we can promote the pointer to its value.
784     SmallVector<OffsetAndArgPart, 4> ArgParts;
785 
786     if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
787       SmallVector<Type *, 4> Types;
788       for (const auto &Pair : ArgParts)
789         Types.push_back(Pair.second.Ty);
790 
791       if (areTypesABICompatible(Types, *F, TTI)) {
792         ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
793       }
794     }
795   }
796 
797   // No promotable pointer arguments.
798   if (ArgsToPromote.empty())
799     return nullptr;
800 
801   return doPromotion(F, FAM, ArgsToPromote);
802 }
803 
804 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
805                                              CGSCCAnalysisManager &AM,
806                                              LazyCallGraph &CG,
807                                              CGSCCUpdateResult &UR) {
808   bool Changed = false, LocalChange;
809 
810   // Iterate until we stop promoting from this SCC.
811   do {
812     LocalChange = false;
813 
814     FunctionAnalysisManager &FAM =
815         AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
816 
817     bool IsRecursive = C.size() > 1;
818     for (LazyCallGraph::Node &N : C) {
819       Function &OldF = N.getFunction();
820       Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive);
821       if (!NewF)
822         continue;
823       LocalChange = true;
824 
825       // Directly substitute the functions in the call graph. Note that this
826       // requires the old function to be completely dead and completely
827       // replaced by the new function. It does no call graph updates, it merely
828       // swaps out the particular function mapped to a particular node in the
829       // graph.
830       C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
831       FAM.clear(OldF, OldF.getName());
832       OldF.eraseFromParent();
833 
834       PreservedAnalyses FuncPA;
835       FuncPA.preserveSet<CFGAnalyses>();
836       for (auto *U : NewF->users()) {
837         auto *UserF = cast<CallBase>(U)->getFunction();
838         FAM.invalidate(*UserF, FuncPA);
839       }
840     }
841 
842     Changed |= LocalChange;
843   } while (LocalChange);
844 
845   if (!Changed)
846     return PreservedAnalyses::all();
847 
848   PreservedAnalyses PA;
849   // We've cleared out analyses for deleted functions.
850   PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
851   // We've manually invalidated analyses for functions we've modified.
852   PA.preserveSet<AllAnalysesOn<Function>>();
853   return PA;
854 }
855