xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // \file
100b57cec5SDimitry Andric // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
110b57cec5SDimitry Andric // the size is large or is not a compile-time constant.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "NVPTXLowerAggrCopies.h"
160b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/StackProtector.h"
180b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
190b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
200b57cec5SDimitry Andric #include "llvm/IR/Function.h"
210b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
220b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
230b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
240b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
250b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
260b57cec5SDimitry Andric #include "llvm/IR/Module.h"
270b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
280b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
290b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric #define DEBUG_TYPE "nvptx"
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric using namespace llvm;
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric namespace {
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric // actual analysis class, which is a functionpass
380b57cec5SDimitry Andric struct NVPTXLowerAggrCopies : public FunctionPass {
390b57cec5SDimitry Andric   static char ID;
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric   NVPTXLowerAggrCopies() : FunctionPass(ID) {}
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
440b57cec5SDimitry Andric     AU.addPreserved<StackProtector>();
450b57cec5SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
460b57cec5SDimitry Andric   }
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric   bool runOnFunction(Function &F) override;
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   static const unsigned MaxAggrCopySize = 128;
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric   StringRef getPassName() const override {
530b57cec5SDimitry Andric     return "Lower aggregate copies/intrinsics into loops";
540b57cec5SDimitry Andric   }
550b57cec5SDimitry Andric };
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric char NVPTXLowerAggrCopies::ID = 0;
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
600b57cec5SDimitry Andric   SmallVector<LoadInst *, 4> AggrLoads;
610b57cec5SDimitry Andric   SmallVector<MemIntrinsic *, 4> MemCalls;
620b57cec5SDimitry Andric 
63*0fca6ea1SDimitry Andric   const DataLayout &DL = F.getDataLayout();
640b57cec5SDimitry Andric   LLVMContext &Context = F.getParent()->getContext();
650b57cec5SDimitry Andric   const TargetTransformInfo &TTI =
660b57cec5SDimitry Andric       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   // Collect all aggregate loads and mem* calls.
6904eeddc0SDimitry Andric   for (BasicBlock &BB : F) {
7004eeddc0SDimitry Andric     for (Instruction &I : BB) {
7104eeddc0SDimitry Andric       if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
720b57cec5SDimitry Andric         if (!LI->hasOneUse())
730b57cec5SDimitry Andric           continue;
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric         if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
760b57cec5SDimitry Andric           continue;
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric         if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
790b57cec5SDimitry Andric           if (SI->getOperand(0) != LI)
800b57cec5SDimitry Andric             continue;
810b57cec5SDimitry Andric           AggrLoads.push_back(LI);
820b57cec5SDimitry Andric         }
8304eeddc0SDimitry Andric       } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(&I)) {
840b57cec5SDimitry Andric         // Convert intrinsic calls with variable size or with constant size
850b57cec5SDimitry Andric         // larger than the MaxAggrCopySize threshold.
860b57cec5SDimitry Andric         if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
870b57cec5SDimitry Andric           if (LenCI->getZExtValue() >= MaxAggrCopySize) {
880b57cec5SDimitry Andric             MemCalls.push_back(IntrCall);
890b57cec5SDimitry Andric           }
900b57cec5SDimitry Andric         } else {
910b57cec5SDimitry Andric           MemCalls.push_back(IntrCall);
920b57cec5SDimitry Andric         }
930b57cec5SDimitry Andric       }
940b57cec5SDimitry Andric     }
950b57cec5SDimitry Andric   }
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric   if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
980b57cec5SDimitry Andric     return false;
990b57cec5SDimitry Andric   }
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric   //
1020b57cec5SDimitry Andric   // Do the transformation of an aggr load/copy/set to a loop
1030b57cec5SDimitry Andric   //
1040b57cec5SDimitry Andric   for (LoadInst *LI : AggrLoads) {
1058bcb0991SDimitry Andric     auto *SI = cast<StoreInst>(*LI->user_begin());
1060b57cec5SDimitry Andric     Value *SrcAddr = LI->getOperand(0);
1070b57cec5SDimitry Andric     Value *DstAddr = SI->getOperand(1);
1080b57cec5SDimitry Andric     unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
1090b57cec5SDimitry Andric     ConstantInt *CopyLen =
1100b57cec5SDimitry Andric         ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
1110b57cec5SDimitry Andric 
1120b57cec5SDimitry Andric     createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
1130b57cec5SDimitry Andric                               /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
1140b57cec5SDimitry Andric                               /* CopyLen */ CopyLen,
1155ffd83dbSDimitry Andric                               /* SrcAlign */ LI->getAlign(),
1165ffd83dbSDimitry Andric                               /* DestAlign */ SI->getAlign(),
1170b57cec5SDimitry Andric                               /* SrcIsVolatile */ LI->isVolatile(),
11881ad6265SDimitry Andric                               /* DstIsVolatile */ SI->isVolatile(),
11981ad6265SDimitry Andric                               /* CanOverlap */ true, TTI);
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric     SI->eraseFromParent();
1220b57cec5SDimitry Andric     LI->eraseFromParent();
1230b57cec5SDimitry Andric   }
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric   // Transform mem* intrinsic calls.
1260b57cec5SDimitry Andric   for (MemIntrinsic *MemCall : MemCalls) {
1270b57cec5SDimitry Andric     if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
1280b57cec5SDimitry Andric       expandMemCpyAsLoop(Memcpy, TTI);
1290b57cec5SDimitry Andric     } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
13006c3fb27SDimitry Andric       expandMemMoveAsLoop(Memmove, TTI);
1310b57cec5SDimitry Andric     } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
1320b57cec5SDimitry Andric       expandMemSetAsLoop(Memset);
1330b57cec5SDimitry Andric     }
1340b57cec5SDimitry Andric     MemCall->eraseFromParent();
1350b57cec5SDimitry Andric   }
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric   return true;
1380b57cec5SDimitry Andric }
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric } // namespace
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric namespace llvm {
1430b57cec5SDimitry Andric void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
1440b57cec5SDimitry Andric }
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
1470b57cec5SDimitry Andric                 "Lower aggregate copies, and llvm.mem* intrinsics into loops",
1480b57cec5SDimitry Andric                 false, false)
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric FunctionPass *llvm::createLowerAggrCopies() {
1510b57cec5SDimitry Andric   return new NVPTXLowerAggrCopies();
1520b57cec5SDimitry Andric }
153