10b57cec5SDimitry Andric //===- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file This pass attempts to replace out argument usage with a return of a
100b57cec5SDimitry Andric /// struct.
110b57cec5SDimitry Andric ///
120b57cec5SDimitry Andric /// We can support returning a lot of values directly in registers, but
130b57cec5SDimitry Andric /// idiomatic C code frequently uses a pointer argument to return a second value
140b57cec5SDimitry Andric /// rather than returning a struct by value. GPU stack access is also quite
150b57cec5SDimitry Andric /// painful, so we want to avoid that if possible. Passing a stack object
160b57cec5SDimitry Andric /// pointer to a function also requires an additional address expansion code
170b57cec5SDimitry Andric /// sequence to convert the pointer to be relative to the kernel's scratch wave
180b57cec5SDimitry Andric /// offset register since the callee doesn't know what stack frame the incoming
190b57cec5SDimitry Andric /// pointer is relative to.
200b57cec5SDimitry Andric ///
210b57cec5SDimitry Andric /// The goal is to try rewriting code that looks like this:
220b57cec5SDimitry Andric ///
230b57cec5SDimitry Andric /// int foo(int a, int b, int* out) {
240b57cec5SDimitry Andric /// *out = bar();
250b57cec5SDimitry Andric /// return a + b;
260b57cec5SDimitry Andric /// }
270b57cec5SDimitry Andric ///
280b57cec5SDimitry Andric /// into something like this:
290b57cec5SDimitry Andric ///
300b57cec5SDimitry Andric /// std::pair<int, int> foo(int a, int b) {
31bdd1243dSDimitry Andric /// return std::pair(a + b, bar());
320b57cec5SDimitry Andric /// }
330b57cec5SDimitry Andric ///
340b57cec5SDimitry Andric /// Typically the incoming pointer is a simple alloca for a temporary variable
350b57cec5SDimitry Andric /// to use the API, which if replaced with a struct return will be easily SROA'd
360b57cec5SDimitry Andric /// out when the stub function we create is inlined
370b57cec5SDimitry Andric ///
380b57cec5SDimitry Andric /// This pass introduces the struct return, but leaves the unused pointer
390b57cec5SDimitry Andric /// arguments and introduces a new stub function calling the struct returning
400b57cec5SDimitry Andric /// body. DeadArgumentElimination should be run after this to clean these up.
410b57cec5SDimitry Andric //
420b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric #include "AMDGPU.h"
450b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
460b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
47480093f4SDimitry Andric #include "llvm/Analysis/MemoryDependenceAnalysis.h"
4806c3fb27SDimitry Andric #include "llvm/IR/AttributeMask.h"
490b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
500b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
51480093f4SDimitry Andric #include "llvm/InitializePasses.h"
520b57cec5SDimitry Andric #include "llvm/Pass.h"
530b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
540b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
550b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-rewrite-out-arguments"
580b57cec5SDimitry Andric
590b57cec5SDimitry Andric using namespace llvm;
600b57cec5SDimitry Andric
610b57cec5SDimitry Andric static cl::opt<bool> AnyAddressSpace(
620b57cec5SDimitry Andric "amdgpu-any-address-space-out-arguments",
630b57cec5SDimitry Andric cl::desc("Replace pointer out arguments with "
640b57cec5SDimitry Andric "struct returns for non-private address space"),
650b57cec5SDimitry Andric cl::Hidden,
660b57cec5SDimitry Andric cl::init(false));
670b57cec5SDimitry Andric
680b57cec5SDimitry Andric static cl::opt<unsigned> MaxNumRetRegs(
690b57cec5SDimitry Andric "amdgpu-max-return-arg-num-regs",
700b57cec5SDimitry Andric cl::desc("Approximately limit number of return registers for replacing out arguments"),
710b57cec5SDimitry Andric cl::Hidden,
720b57cec5SDimitry Andric cl::init(16));
730b57cec5SDimitry Andric
740b57cec5SDimitry Andric STATISTIC(NumOutArgumentsReplaced,
750b57cec5SDimitry Andric "Number out arguments moved to struct return values");
760b57cec5SDimitry Andric STATISTIC(NumOutArgumentFunctionsReplaced,
770b57cec5SDimitry Andric "Number of functions with out arguments moved to struct return values");
780b57cec5SDimitry Andric
790b57cec5SDimitry Andric namespace {
800b57cec5SDimitry Andric
810b57cec5SDimitry Andric class AMDGPURewriteOutArguments : public FunctionPass {
820b57cec5SDimitry Andric private:
830b57cec5SDimitry Andric const DataLayout *DL = nullptr;
840b57cec5SDimitry Andric MemoryDependenceResults *MDA = nullptr;
850b57cec5SDimitry Andric
8681ad6265SDimitry Andric Type *getStoredType(Value &Arg) const;
8781ad6265SDimitry Andric Type *getOutArgumentType(Argument &Arg) const;
880b57cec5SDimitry Andric
890b57cec5SDimitry Andric public:
900b57cec5SDimitry Andric static char ID;
910b57cec5SDimitry Andric
AMDGPURewriteOutArguments()920b57cec5SDimitry Andric AMDGPURewriteOutArguments() : FunctionPass(ID) {}
930b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const940b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
950b57cec5SDimitry Andric AU.addRequired<MemoryDependenceWrapperPass>();
960b57cec5SDimitry Andric FunctionPass::getAnalysisUsage(AU);
970b57cec5SDimitry Andric }
980b57cec5SDimitry Andric
990b57cec5SDimitry Andric bool doInitialization(Module &M) override;
1000b57cec5SDimitry Andric bool runOnFunction(Function &F) override;
1010b57cec5SDimitry Andric };
1020b57cec5SDimitry Andric
1030b57cec5SDimitry Andric } // end anonymous namespace
1040b57cec5SDimitry Andric
1050b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPURewriteOutArguments, DEBUG_TYPE,
1060b57cec5SDimitry Andric "AMDGPU Rewrite Out Arguments", false, false)
1070b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
1080b57cec5SDimitry Andric INITIALIZE_PASS_END(AMDGPURewriteOutArguments, DEBUG_TYPE,
1090b57cec5SDimitry Andric "AMDGPU Rewrite Out Arguments", false, false)
1100b57cec5SDimitry Andric
1110b57cec5SDimitry Andric char AMDGPURewriteOutArguments::ID = 0;
1120b57cec5SDimitry Andric
getStoredType(Value & Arg) const11381ad6265SDimitry Andric Type *AMDGPURewriteOutArguments::getStoredType(Value &Arg) const {
1140b57cec5SDimitry Andric const int MaxUses = 10;
1150b57cec5SDimitry Andric int UseCount = 0;
1160b57cec5SDimitry Andric
11781ad6265SDimitry Andric SmallVector<Use *> Worklist;
11881ad6265SDimitry Andric for (Use &U : Arg.uses())
11981ad6265SDimitry Andric Worklist.push_back(&U);
1200b57cec5SDimitry Andric
12181ad6265SDimitry Andric Type *StoredType = nullptr;
12281ad6265SDimitry Andric while (!Worklist.empty()) {
12381ad6265SDimitry Andric Use *U = Worklist.pop_back_val();
1240b57cec5SDimitry Andric
12581ad6265SDimitry Andric if (auto *BCI = dyn_cast<BitCastInst>(U->getUser())) {
12681ad6265SDimitry Andric for (Use &U : BCI->uses())
12781ad6265SDimitry Andric Worklist.push_back(&U);
12881ad6265SDimitry Andric continue;
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric
13181ad6265SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
13281ad6265SDimitry Andric if (UseCount++ > MaxUses)
13381ad6265SDimitry Andric return nullptr;
13481ad6265SDimitry Andric
1350b57cec5SDimitry Andric if (!SI->isSimple() ||
13681ad6265SDimitry Andric U->getOperandNo() != StoreInst::getPointerOperandIndex())
13781ad6265SDimitry Andric return nullptr;
1380b57cec5SDimitry Andric
13981ad6265SDimitry Andric if (StoredType && StoredType != SI->getValueOperand()->getType())
14081ad6265SDimitry Andric return nullptr; // More than one type.
14181ad6265SDimitry Andric StoredType = SI->getValueOperand()->getType();
14281ad6265SDimitry Andric continue;
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric
14581ad6265SDimitry Andric // Unsupported user.
14681ad6265SDimitry Andric return nullptr;
1470b57cec5SDimitry Andric }
1480b57cec5SDimitry Andric
14981ad6265SDimitry Andric return StoredType;
15081ad6265SDimitry Andric }
15181ad6265SDimitry Andric
getOutArgumentType(Argument & Arg) const15281ad6265SDimitry Andric Type *AMDGPURewriteOutArguments::getOutArgumentType(Argument &Arg) const {
1530b57cec5SDimitry Andric const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs;
1540b57cec5SDimitry Andric PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType());
1550b57cec5SDimitry Andric
1560b57cec5SDimitry Andric // TODO: It might be useful for any out arguments, not just privates.
1570b57cec5SDimitry Andric if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() &&
1580b57cec5SDimitry Andric !AnyAddressSpace) ||
15981ad6265SDimitry Andric Arg.hasByValAttr() || Arg.hasStructRetAttr()) {
16081ad6265SDimitry Andric return nullptr;
1610b57cec5SDimitry Andric }
1620b57cec5SDimitry Andric
16381ad6265SDimitry Andric Type *StoredType = getStoredType(Arg);
16481ad6265SDimitry Andric if (!StoredType || DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes)
16581ad6265SDimitry Andric return nullptr;
16681ad6265SDimitry Andric
16781ad6265SDimitry Andric return StoredType;
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric
doInitialization(Module & M)1700b57cec5SDimitry Andric bool AMDGPURewriteOutArguments::doInitialization(Module &M) {
1710b57cec5SDimitry Andric DL = &M.getDataLayout();
1720b57cec5SDimitry Andric return false;
1730b57cec5SDimitry Andric }
1740b57cec5SDimitry Andric
runOnFunction(Function & F)1750b57cec5SDimitry Andric bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
1760b57cec5SDimitry Andric if (skipFunction(F))
1770b57cec5SDimitry Andric return false;
1780b57cec5SDimitry Andric
1790b57cec5SDimitry Andric // TODO: Could probably handle variadic functions.
1800b57cec5SDimitry Andric if (F.isVarArg() || F.hasStructRetAttr() ||
1810b57cec5SDimitry Andric AMDGPU::isEntryFunctionCC(F.getCallingConv()))
1820b57cec5SDimitry Andric return false;
1830b57cec5SDimitry Andric
1840b57cec5SDimitry Andric MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
1850b57cec5SDimitry Andric
1860b57cec5SDimitry Andric unsigned ReturnNumRegs = 0;
18781ad6265SDimitry Andric SmallDenseMap<int, Type *, 4> OutArgIndexes;
1880b57cec5SDimitry Andric SmallVector<Type *, 4> ReturnTypes;
1890b57cec5SDimitry Andric Type *RetTy = F.getReturnType();
1900b57cec5SDimitry Andric if (!RetTy->isVoidTy()) {
1910b57cec5SDimitry Andric ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4;
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric if (ReturnNumRegs >= MaxNumRetRegs)
1940b57cec5SDimitry Andric return false;
1950b57cec5SDimitry Andric
1960b57cec5SDimitry Andric ReturnTypes.push_back(RetTy);
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric
19981ad6265SDimitry Andric SmallVector<std::pair<Argument *, Type *>, 4> OutArgs;
2000b57cec5SDimitry Andric for (Argument &Arg : F.args()) {
20181ad6265SDimitry Andric if (Type *Ty = getOutArgumentType(Arg)) {
2020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
2030b57cec5SDimitry Andric << " in function " << F.getName() << '\n');
20481ad6265SDimitry Andric OutArgs.push_back({&Arg, Ty});
2050b57cec5SDimitry Andric }
2060b57cec5SDimitry Andric }
2070b57cec5SDimitry Andric
2080b57cec5SDimitry Andric if (OutArgs.empty())
2090b57cec5SDimitry Andric return false;
2100b57cec5SDimitry Andric
2110b57cec5SDimitry Andric using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>;
2120b57cec5SDimitry Andric
2130b57cec5SDimitry Andric DenseMap<ReturnInst *, ReplacementVec> Replacements;
2140b57cec5SDimitry Andric
2150b57cec5SDimitry Andric SmallVector<ReturnInst *, 4> Returns;
2160b57cec5SDimitry Andric for (BasicBlock &BB : F) {
2170b57cec5SDimitry Andric if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back()))
2180b57cec5SDimitry Andric Returns.push_back(RI);
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric
2210b57cec5SDimitry Andric if (Returns.empty())
2220b57cec5SDimitry Andric return false;
2230b57cec5SDimitry Andric
2240b57cec5SDimitry Andric bool Changing;
2250b57cec5SDimitry Andric
2260b57cec5SDimitry Andric do {
2270b57cec5SDimitry Andric Changing = false;
2280b57cec5SDimitry Andric
2290b57cec5SDimitry Andric // Keep retrying if we are able to successfully eliminate an argument. This
2300b57cec5SDimitry Andric // helps with cases with multiple arguments which may alias, such as in a
231349cc55cSDimitry Andric // sincos implementation. If we have 2 stores to arguments, on the first
2320b57cec5SDimitry Andric // attempt the MDA query will succeed for the second store but not the
2330b57cec5SDimitry Andric // first. On the second iteration we've removed that out clobbering argument
2340b57cec5SDimitry Andric // (by effectively moving it into another function) and will find the second
2350b57cec5SDimitry Andric // argument is OK to move.
23681ad6265SDimitry Andric for (const auto &Pair : OutArgs) {
2370b57cec5SDimitry Andric bool ThisReplaceable = true;
2380b57cec5SDimitry Andric SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores;
2390b57cec5SDimitry Andric
24081ad6265SDimitry Andric Argument *OutArg = Pair.first;
24181ad6265SDimitry Andric Type *ArgTy = Pair.second;
2420b57cec5SDimitry Andric
2430b57cec5SDimitry Andric // Skip this argument if converting it will push us over the register
2440b57cec5SDimitry Andric // count to return limit.
2450b57cec5SDimitry Andric
2460b57cec5SDimitry Andric // TODO: This is an approximation. When legalized this could be more. We
2470b57cec5SDimitry Andric // can ask TLI for exactly how many.
2480b57cec5SDimitry Andric unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4;
2490b57cec5SDimitry Andric if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs)
2500b57cec5SDimitry Andric continue;
2510b57cec5SDimitry Andric
2520b57cec5SDimitry Andric // An argument is convertible only if all exit blocks are able to replace
2530b57cec5SDimitry Andric // it.
2540b57cec5SDimitry Andric for (ReturnInst *RI : Returns) {
2550b57cec5SDimitry Andric BasicBlock *BB = RI->getParent();
2560b57cec5SDimitry Andric
257e8d8bef9SDimitry Andric MemDepResult Q = MDA->getPointerDependencyFrom(
258e8d8bef9SDimitry Andric MemoryLocation::getBeforeOrAfter(OutArg), true, BB->end(), BB, RI);
2590b57cec5SDimitry Andric StoreInst *SI = nullptr;
2600b57cec5SDimitry Andric if (Q.isDef())
2610b57cec5SDimitry Andric SI = dyn_cast<StoreInst>(Q.getInst());
2620b57cec5SDimitry Andric
2630b57cec5SDimitry Andric if (SI) {
2640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n');
2650b57cec5SDimitry Andric ReplaceableStores.emplace_back(RI, SI);
2660b57cec5SDimitry Andric } else {
2670b57cec5SDimitry Andric ThisReplaceable = false;
2680b57cec5SDimitry Andric break;
2690b57cec5SDimitry Andric }
2700b57cec5SDimitry Andric }
2710b57cec5SDimitry Andric
2720b57cec5SDimitry Andric if (!ThisReplaceable)
2730b57cec5SDimitry Andric continue; // Try the next argument candidate.
2740b57cec5SDimitry Andric
2750b57cec5SDimitry Andric for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) {
2760b57cec5SDimitry Andric Value *ReplVal = Store.second->getValueOperand();
2770b57cec5SDimitry Andric
2780b57cec5SDimitry Andric auto &ValVec = Replacements[Store.first];
279e8d8bef9SDimitry Andric if (llvm::any_of(ValVec,
2800b57cec5SDimitry Andric [OutArg](const std::pair<Argument *, Value *> &Entry) {
281e8d8bef9SDimitry Andric return Entry.first == OutArg;
282e8d8bef9SDimitry Andric })) {
2830b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
2840b57cec5SDimitry Andric << "Saw multiple out arg stores" << *OutArg << '\n');
2850b57cec5SDimitry Andric // It is possible to see stores to the same argument multiple times,
2860b57cec5SDimitry Andric // but we expect these would have been optimized out already.
2870b57cec5SDimitry Andric ThisReplaceable = false;
2880b57cec5SDimitry Andric break;
2890b57cec5SDimitry Andric }
2900b57cec5SDimitry Andric
2910b57cec5SDimitry Andric ValVec.emplace_back(OutArg, ReplVal);
2920b57cec5SDimitry Andric Store.second->eraseFromParent();
2930b57cec5SDimitry Andric }
2940b57cec5SDimitry Andric
2950b57cec5SDimitry Andric if (ThisReplaceable) {
2960b57cec5SDimitry Andric ReturnTypes.push_back(ArgTy);
29781ad6265SDimitry Andric OutArgIndexes.insert({OutArg->getArgNo(), ArgTy});
2980b57cec5SDimitry Andric ++NumOutArgumentsReplaced;
2990b57cec5SDimitry Andric Changing = true;
3000b57cec5SDimitry Andric }
3010b57cec5SDimitry Andric }
3020b57cec5SDimitry Andric } while (Changing);
3030b57cec5SDimitry Andric
3040b57cec5SDimitry Andric if (Replacements.empty())
3050b57cec5SDimitry Andric return false;
3060b57cec5SDimitry Andric
3070b57cec5SDimitry Andric LLVMContext &Ctx = F.getParent()->getContext();
3080b57cec5SDimitry Andric StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName());
3090b57cec5SDimitry Andric
3100b57cec5SDimitry Andric FunctionType *NewFuncTy = FunctionType::get(NewRetTy,
3110b57cec5SDimitry Andric F.getFunctionType()->params(),
3120b57cec5SDimitry Andric F.isVarArg());
3130b57cec5SDimitry Andric
3140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n');
3150b57cec5SDimitry Andric
3160b57cec5SDimitry Andric Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage,
3170b57cec5SDimitry Andric F.getName() + ".body");
3180b57cec5SDimitry Andric F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc);
3190b57cec5SDimitry Andric NewFunc->copyAttributesFrom(&F);
3200b57cec5SDimitry Andric NewFunc->setComdat(F.getComdat());
3210b57cec5SDimitry Andric
3220b57cec5SDimitry Andric // We want to preserve the function and param attributes, but need to strip
3230b57cec5SDimitry Andric // off any return attributes, e.g. zeroext doesn't make sense with a struct.
3240b57cec5SDimitry Andric NewFunc->stealArgumentListFrom(F);
3250b57cec5SDimitry Andric
32604eeddc0SDimitry Andric AttributeMask RetAttrs;
3270b57cec5SDimitry Andric RetAttrs.addAttribute(Attribute::SExt);
3280b57cec5SDimitry Andric RetAttrs.addAttribute(Attribute::ZExt);
3290b57cec5SDimitry Andric RetAttrs.addAttribute(Attribute::NoAlias);
330349cc55cSDimitry Andric NewFunc->removeRetAttrs(RetAttrs);
3310b57cec5SDimitry Andric // TODO: How to preserve metadata?
3320b57cec5SDimitry Andric
333*5f757f3fSDimitry Andric NewFunc->setIsNewDbgInfoFormat(F.IsNewDbgInfoFormat);
334*5f757f3fSDimitry Andric
3350b57cec5SDimitry Andric // Move the body of the function into the new rewritten function, and replace
3360b57cec5SDimitry Andric // this function with a stub.
337bdd1243dSDimitry Andric NewFunc->splice(NewFunc->begin(), &F);
3380b57cec5SDimitry Andric
3390b57cec5SDimitry Andric for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
3400b57cec5SDimitry Andric ReturnInst *RI = Replacement.first;
3410b57cec5SDimitry Andric IRBuilder<> B(RI);
3420b57cec5SDimitry Andric B.SetCurrentDebugLocation(RI->getDebugLoc());
3430b57cec5SDimitry Andric
3440b57cec5SDimitry Andric int RetIdx = 0;
345bdd1243dSDimitry Andric Value *NewRetVal = PoisonValue::get(NewRetTy);
3460b57cec5SDimitry Andric
3470b57cec5SDimitry Andric Value *RetVal = RI->getReturnValue();
3480b57cec5SDimitry Andric if (RetVal)
3490b57cec5SDimitry Andric NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
3500b57cec5SDimitry Andric
35181ad6265SDimitry Andric for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second)
35281ad6265SDimitry Andric NewRetVal = B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++);
3530b57cec5SDimitry Andric
3540b57cec5SDimitry Andric if (RetVal)
3550b57cec5SDimitry Andric RI->setOperand(0, NewRetVal);
3560b57cec5SDimitry Andric else {
3570b57cec5SDimitry Andric B.CreateRet(NewRetVal);
3580b57cec5SDimitry Andric RI->eraseFromParent();
3590b57cec5SDimitry Andric }
3600b57cec5SDimitry Andric }
3610b57cec5SDimitry Andric
3620b57cec5SDimitry Andric SmallVector<Value *, 16> StubCallArgs;
3630b57cec5SDimitry Andric for (Argument &Arg : F.args()) {
3640b57cec5SDimitry Andric if (OutArgIndexes.count(Arg.getArgNo())) {
3650b57cec5SDimitry Andric // It's easier to preserve the type of the argument list. We rely on
3660b57cec5SDimitry Andric // DeadArgumentElimination to take care of these.
367bdd1243dSDimitry Andric StubCallArgs.push_back(PoisonValue::get(Arg.getType()));
3680b57cec5SDimitry Andric } else {
3690b57cec5SDimitry Andric StubCallArgs.push_back(&Arg);
3700b57cec5SDimitry Andric }
3710b57cec5SDimitry Andric }
3720b57cec5SDimitry Andric
3730b57cec5SDimitry Andric BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F);
3740b57cec5SDimitry Andric IRBuilder<> B(StubBB);
3750b57cec5SDimitry Andric CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs);
3760b57cec5SDimitry Andric
3770b57cec5SDimitry Andric int RetIdx = RetTy->isVoidTy() ? 0 : 1;
3780b57cec5SDimitry Andric for (Argument &Arg : F.args()) {
3790b57cec5SDimitry Andric if (!OutArgIndexes.count(Arg.getArgNo()))
3800b57cec5SDimitry Andric continue;
3810b57cec5SDimitry Andric
38281ad6265SDimitry Andric Type *EltTy = OutArgIndexes[Arg.getArgNo()];
3835ffd83dbSDimitry Andric const auto Align =
3845ffd83dbSDimitry Andric DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
3850b57cec5SDimitry Andric
3860b57cec5SDimitry Andric Value *Val = B.CreateExtractValue(StubCall, RetIdx++);
38706c3fb27SDimitry Andric B.CreateAlignedStore(Val, &Arg, Align);
3880b57cec5SDimitry Andric }
3890b57cec5SDimitry Andric
3900b57cec5SDimitry Andric if (!RetTy->isVoidTy()) {
3910b57cec5SDimitry Andric B.CreateRet(B.CreateExtractValue(StubCall, 0));
3920b57cec5SDimitry Andric } else {
3930b57cec5SDimitry Andric B.CreateRetVoid();
3940b57cec5SDimitry Andric }
3950b57cec5SDimitry Andric
3960b57cec5SDimitry Andric // The function is now a stub we want to inline.
3970b57cec5SDimitry Andric F.addFnAttr(Attribute::AlwaysInline);
3980b57cec5SDimitry Andric
3990b57cec5SDimitry Andric ++NumOutArgumentFunctionsReplaced;
4000b57cec5SDimitry Andric return true;
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric
createAMDGPURewriteOutArgumentsPass()4030b57cec5SDimitry Andric FunctionPass *llvm::createAMDGPURewriteOutArgumentsPass() {
4040b57cec5SDimitry Andric return new AMDGPURewriteOutArguments();
4050b57cec5SDimitry Andric }
406