xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Top-level implementation for the NVPTX target.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "NVPTXTargetMachine.h"
140b57cec5SDimitry Andric #include "NVPTX.h"
1506c3fb27SDimitry Andric #include "NVPTXAliasAnalysis.h"
160b57cec5SDimitry Andric #include "NVPTXAllocaHoisting.h"
17fe6060f1SDimitry Andric #include "NVPTXAtomicLower.h"
1806c3fb27SDimitry Andric #include "NVPTXCtorDtorLowering.h"
190b57cec5SDimitry Andric #include "NVPTXLowerAggrCopies.h"
20bdd1243dSDimitry Andric #include "NVPTXMachineFunctionInfo.h"
210b57cec5SDimitry Andric #include "NVPTXTargetObjectFile.h"
220b57cec5SDimitry Andric #include "NVPTXTargetTransformInfo.h"
230b57cec5SDimitry Andric #include "TargetInfo/NVPTXTargetInfo.h"
240b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
28349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h"
29349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h"
300b57cec5SDimitry Andric #include "llvm/Pass.h"
31e8d8bef9SDimitry Andric #include "llvm/Passes/PassBuilder.h"
320b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
330b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
340b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
3506c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h"
36*0fca6ea1SDimitry Andric #include "llvm/Transforms/IPO/ExpandVariadics.h"
370b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
380b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/GVN.h"
3906c3fb27SDimitry Andric #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
400b57cec5SDimitry Andric #include <cassert>
41bdd1243dSDimitry Andric #include <optional>
420b57cec5SDimitry Andric #include <string>
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric using namespace llvm;
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric // LSV is still relatively new; this switch lets us turn it off in case we
470b57cec5SDimitry Andric // encounter (or suspect) a bug.
480b57cec5SDimitry Andric static cl::opt<bool>
490b57cec5SDimitry Andric     DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
500b57cec5SDimitry Andric                                cl::desc("Disable load/store vectorizer"),
510b57cec5SDimitry Andric                                cl::init(false), cl::Hidden);
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric // TODO: Remove this flag when we are confident with no regressions.
540b57cec5SDimitry Andric static cl::opt<bool> DisableRequireStructuredCFG(
550b57cec5SDimitry Andric     "disable-nvptx-require-structured-cfg",
560b57cec5SDimitry Andric     cl::desc("Transitional flag to turn off NVPTX's requirement on preserving "
570b57cec5SDimitry Andric              "structured CFG. The requirement should be disabled only when "
580b57cec5SDimitry Andric              "unexpected regressions happen."),
590b57cec5SDimitry Andric     cl::init(false), cl::Hidden);
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric static cl::opt<bool> UseShortPointersOpt(
620b57cec5SDimitry Andric     "nvptx-short-ptr",
630b57cec5SDimitry Andric     cl::desc(
640b57cec5SDimitry Andric         "Use 32-bit pointers for accessing const/local/shared address spaces."),
650b57cec5SDimitry Andric     cl::init(false), cl::Hidden);
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric namespace llvm {
680b57cec5SDimitry Andric 
6906c3fb27SDimitry Andric void initializeGenericToNVVMLegacyPassPass(PassRegistry &);
700b57cec5SDimitry Andric void initializeNVPTXAllocaHoistingPass(PassRegistry &);
710b57cec5SDimitry Andric void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry &);
72bdd1243dSDimitry Andric void initializeNVPTXAtomicLowerPass(PassRegistry &);
7306c3fb27SDimitry Andric void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
740b57cec5SDimitry Andric void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
750b57cec5SDimitry Andric void initializeNVPTXLowerAllocaPass(PassRegistry &);
7606c3fb27SDimitry Andric void initializeNVPTXLowerUnreachablePass(PassRegistry &);
7706c3fb27SDimitry Andric void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
78bdd1243dSDimitry Andric void initializeNVPTXLowerArgsPass(PassRegistry &);
790b57cec5SDimitry Andric void initializeNVPTXProxyRegErasurePass(PassRegistry &);
80bdd1243dSDimitry Andric void initializeNVVMIntrRangePass(PassRegistry &);
81bdd1243dSDimitry Andric void initializeNVVMReflectPass(PassRegistry &);
8206c3fb27SDimitry Andric void initializeNVPTXAAWrapperPassPass(PassRegistry &);
8306c3fb27SDimitry Andric void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric } // end namespace llvm
860b57cec5SDimitry Andric 
87480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
880b57cec5SDimitry Andric   // Register the target.
890b57cec5SDimitry Andric   RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
900b57cec5SDimitry Andric   RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
910b57cec5SDimitry Andric 
92bdd1243dSDimitry Andric   PassRegistry &PR = *PassRegistry::getPassRegistry();
930b57cec5SDimitry Andric   // FIXME: This pass is really intended to be invoked during IR optimization,
940b57cec5SDimitry Andric   // but it's very NVPTX-specific.
950b57cec5SDimitry Andric   initializeNVVMReflectPass(PR);
960b57cec5SDimitry Andric   initializeNVVMIntrRangePass(PR);
9706c3fb27SDimitry Andric   initializeGenericToNVVMLegacyPassPass(PR);
980b57cec5SDimitry Andric   initializeNVPTXAllocaHoistingPass(PR);
990b57cec5SDimitry Andric   initializeNVPTXAssignValidGlobalNamesPass(PR);
100fe6060f1SDimitry Andric   initializeNVPTXAtomicLowerPass(PR);
1010b57cec5SDimitry Andric   initializeNVPTXLowerArgsPass(PR);
1020b57cec5SDimitry Andric   initializeNVPTXLowerAllocaPass(PR);
10306c3fb27SDimitry Andric   initializeNVPTXLowerUnreachablePass(PR);
10406c3fb27SDimitry Andric   initializeNVPTXCtorDtorLoweringLegacyPass(PR);
1050b57cec5SDimitry Andric   initializeNVPTXLowerAggrCopiesPass(PR);
1060b57cec5SDimitry Andric   initializeNVPTXProxyRegErasurePass(PR);
107*0fca6ea1SDimitry Andric   initializeNVPTXDAGToDAGISelLegacyPass(PR);
10806c3fb27SDimitry Andric   initializeNVPTXAAWrapperPassPass(PR);
10906c3fb27SDimitry Andric   initializeNVPTXExternalAAWrapperPass(PR);
1100b57cec5SDimitry Andric }
1110b57cec5SDimitry Andric 
1120b57cec5SDimitry Andric static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
1130b57cec5SDimitry Andric   std::string Ret = "e";
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric   if (!is64Bit)
1160b57cec5SDimitry Andric     Ret += "-p:32:32";
1170b57cec5SDimitry Andric   else if (UseShortPointers)
1180b57cec5SDimitry Andric     Ret += "-p3:32:32-p4:32:32-p5:32:32";
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric   Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric   return Ret;
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
1260b57cec5SDimitry Andric                                        StringRef CPU, StringRef FS,
1270b57cec5SDimitry Andric                                        const TargetOptions &Options,
128bdd1243dSDimitry Andric                                        std::optional<Reloc::Model> RM,
129bdd1243dSDimitry Andric                                        std::optional<CodeModel::Model> CM,
1305f757f3fSDimitry Andric                                        CodeGenOptLevel OL, bool is64bit)
1310b57cec5SDimitry Andric     // The pic relocation model is used regardless of what the client has
1320b57cec5SDimitry Andric     // specified, as it is the only relocation model currently supported.
1330b57cec5SDimitry Andric     : LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
1340b57cec5SDimitry Andric                         CPU, FS, Options, Reloc::PIC_,
1350b57cec5SDimitry Andric                         getEffectiveCodeModel(CM, CodeModel::Small), OL),
136*0fca6ea1SDimitry Andric       is64bit(is64bit), TLOF(std::make_unique<NVPTXTargetObjectFile>()),
137bdd1243dSDimitry Andric       Subtarget(TT, std::string(CPU), std::string(FS), *this),
138bdd1243dSDimitry Andric       StrPool(StrAlloc) {
1390b57cec5SDimitry Andric   if (TT.getOS() == Triple::NVCL)
1400b57cec5SDimitry Andric     drvInterface = NVPTX::NVCL;
1410b57cec5SDimitry Andric   else
1420b57cec5SDimitry Andric     drvInterface = NVPTX::CUDA;
1430b57cec5SDimitry Andric   if (!DisableRequireStructuredCFG)
1440b57cec5SDimitry Andric     setRequiresStructuredCFG(true);
1450b57cec5SDimitry Andric   initAsmInfo();
1460b57cec5SDimitry Andric }
1470b57cec5SDimitry Andric 
1480b57cec5SDimitry Andric NVPTXTargetMachine::~NVPTXTargetMachine() = default;
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric void NVPTXTargetMachine32::anchor() {}
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
1530b57cec5SDimitry Andric                                            StringRef CPU, StringRef FS,
1540b57cec5SDimitry Andric                                            const TargetOptions &Options,
155bdd1243dSDimitry Andric                                            std::optional<Reloc::Model> RM,
156bdd1243dSDimitry Andric                                            std::optional<CodeModel::Model> CM,
1575f757f3fSDimitry Andric                                            CodeGenOptLevel OL, bool JIT)
1580b57cec5SDimitry Andric     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric void NVPTXTargetMachine64::anchor() {}
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
1630b57cec5SDimitry Andric                                            StringRef CPU, StringRef FS,
1640b57cec5SDimitry Andric                                            const TargetOptions &Options,
165bdd1243dSDimitry Andric                                            std::optional<Reloc::Model> RM,
166bdd1243dSDimitry Andric                                            std::optional<CodeModel::Model> CM,
1675f757f3fSDimitry Andric                                            CodeGenOptLevel OL, bool JIT)
1680b57cec5SDimitry Andric     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric namespace {
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric class NVPTXPassConfig : public TargetPassConfig {
1730b57cec5SDimitry Andric public:
1740b57cec5SDimitry Andric   NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM)
1750b57cec5SDimitry Andric       : TargetPassConfig(TM, PM) {}
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric   NVPTXTargetMachine &getNVPTXTargetMachine() const {
1780b57cec5SDimitry Andric     return getTM<NVPTXTargetMachine>();
1790b57cec5SDimitry Andric   }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric   void addIRPasses() override;
1820b57cec5SDimitry Andric   bool addInstSelector() override;
1830b57cec5SDimitry Andric   void addPreRegAlloc() override;
1840b57cec5SDimitry Andric   void addPostRegAlloc() override;
1850b57cec5SDimitry Andric   void addMachineSSAOptimization() override;
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric   FunctionPass *createTargetRegisterAllocator(bool) override;
1880b57cec5SDimitry Andric   void addFastRegAlloc() override;
1890b57cec5SDimitry Andric   void addOptimizedRegAlloc() override;
1900b57cec5SDimitry Andric 
191e8d8bef9SDimitry Andric   bool addRegAssignAndRewriteFast() override {
1920b57cec5SDimitry Andric     llvm_unreachable("should not be used");
1930b57cec5SDimitry Andric   }
1940b57cec5SDimitry Andric 
195e8d8bef9SDimitry Andric   bool addRegAssignAndRewriteOptimized() override {
1960b57cec5SDimitry Andric     llvm_unreachable("should not be used");
1970b57cec5SDimitry Andric   }
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric private:
2000b57cec5SDimitry Andric   // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
2010b57cec5SDimitry Andric   // function is only called in opt mode.
2020b57cec5SDimitry Andric   void addEarlyCSEOrGVNPass();
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric   // Add passes that propagate special memory spaces.
2050b57cec5SDimitry Andric   void addAddressSpaceInferencePasses();
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric   // Add passes that perform straight-line scalar optimizations.
2080b57cec5SDimitry Andric   void addStraightLineScalarOptimizationPasses();
2090b57cec5SDimitry Andric };
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric } // end anonymous namespace
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
2140b57cec5SDimitry Andric   return new NVPTXPassConfig(*this, PM);
2150b57cec5SDimitry Andric }
2160b57cec5SDimitry Andric 
217bdd1243dSDimitry Andric MachineFunctionInfo *NVPTXTargetMachine::createMachineFunctionInfo(
218bdd1243dSDimitry Andric     BumpPtrAllocator &Allocator, const Function &F,
219bdd1243dSDimitry Andric     const TargetSubtargetInfo *STI) const {
220bdd1243dSDimitry Andric   return NVPTXMachineFunctionInfo::create<NVPTXMachineFunctionInfo>(Allocator,
221bdd1243dSDimitry Andric                                                                     F, STI);
2220b57cec5SDimitry Andric }
2230b57cec5SDimitry Andric 
22406c3fb27SDimitry Andric void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
22506c3fb27SDimitry Andric   AAM.registerFunctionAnalysis<NVPTXAA>();
22606c3fb27SDimitry Andric }
22706c3fb27SDimitry Andric 
228*0fca6ea1SDimitry Andric void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
229*0fca6ea1SDimitry Andric #define GET_PASS_REGISTRY "NVPTXPassRegistry.def"
230*0fca6ea1SDimitry Andric #include "llvm/Passes/TargetPassRegistry.inc"
23106c3fb27SDimitry Andric 
232e8d8bef9SDimitry Andric   PB.registerPipelineStartEPCallback(
233349cc55cSDimitry Andric       [this](ModulePassManager &PM, OptimizationLevel Level) {
234fe6060f1SDimitry Andric         FunctionPassManager FPM;
235e8d8bef9SDimitry Andric         FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
236*0fca6ea1SDimitry Andric         // Note: NVVMIntrRangePass was causing numerical discrepancies at one
237*0fca6ea1SDimitry Andric         // point, if issues crop up, consider disabling.
238*0fca6ea1SDimitry Andric         FPM.addPass(NVVMIntrRangePass());
239e8d8bef9SDimitry Andric         PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
240e8d8bef9SDimitry Andric       });
241e8d8bef9SDimitry Andric }
242e8d8bef9SDimitry Andric 
2430b57cec5SDimitry Andric TargetTransformInfo
24481ad6265SDimitry Andric NVPTXTargetMachine::getTargetTransformInfo(const Function &F) const {
2450b57cec5SDimitry Andric   return TargetTransformInfo(NVPTXTTIImpl(this, F));
2460b57cec5SDimitry Andric }
2470b57cec5SDimitry Andric 
248349cc55cSDimitry Andric std::pair<const Value *, unsigned>
249349cc55cSDimitry Andric NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
250349cc55cSDimitry Andric   if (auto *II = dyn_cast<IntrinsicInst>(V)) {
251349cc55cSDimitry Andric     switch (II->getIntrinsicID()) {
252349cc55cSDimitry Andric     case Intrinsic::nvvm_isspacep_const:
253349cc55cSDimitry Andric       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_CONST);
254349cc55cSDimitry Andric     case Intrinsic::nvvm_isspacep_global:
255349cc55cSDimitry Andric       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_GLOBAL);
256349cc55cSDimitry Andric     case Intrinsic::nvvm_isspacep_local:
257349cc55cSDimitry Andric       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
258349cc55cSDimitry Andric     case Intrinsic::nvvm_isspacep_shared:
25906c3fb27SDimitry Andric     case Intrinsic::nvvm_isspacep_shared_cluster:
260349cc55cSDimitry Andric       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
261349cc55cSDimitry Andric     default:
262349cc55cSDimitry Andric       break;
263349cc55cSDimitry Andric     }
264349cc55cSDimitry Andric   }
265349cc55cSDimitry Andric   return std::make_pair(nullptr, -1);
266349cc55cSDimitry Andric }
267349cc55cSDimitry Andric 
2680b57cec5SDimitry Andric void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
2695f757f3fSDimitry Andric   if (getOptLevel() == CodeGenOptLevel::Aggressive)
2700b57cec5SDimitry Andric     addPass(createGVNPass());
2710b57cec5SDimitry Andric   else
2720b57cec5SDimitry Andric     addPass(createEarlyCSEPass());
2730b57cec5SDimitry Andric }
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric void NVPTXPassConfig::addAddressSpaceInferencePasses() {
2760b57cec5SDimitry Andric   // NVPTXLowerArgs emits alloca for byval parameters which can often
2770b57cec5SDimitry Andric   // be eliminated by SROA.
2780b57cec5SDimitry Andric   addPass(createSROAPass());
2790b57cec5SDimitry Andric   addPass(createNVPTXLowerAllocaPass());
2800b57cec5SDimitry Andric   addPass(createInferAddressSpacesPass());
281fe6060f1SDimitry Andric   addPass(createNVPTXAtomicLowerPass());
2820b57cec5SDimitry Andric }
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
2850b57cec5SDimitry Andric   addPass(createSeparateConstOffsetFromGEPPass());
2860b57cec5SDimitry Andric   addPass(createSpeculativeExecutionPass());
2870b57cec5SDimitry Andric   // ReassociateGEPs exposes more opportunites for SLSR. See
2880b57cec5SDimitry Andric   // the example in reassociate-geps-and-slsr.ll.
2890b57cec5SDimitry Andric   addPass(createStraightLineStrengthReducePass());
2900b57cec5SDimitry Andric   // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
2910b57cec5SDimitry Andric   // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
2920b57cec5SDimitry Andric   // for some of our benchmarks.
2930b57cec5SDimitry Andric   addEarlyCSEOrGVNPass();
2940b57cec5SDimitry Andric   // Run NaryReassociate after EarlyCSE/GVN to be more effective.
2950b57cec5SDimitry Andric   addPass(createNaryReassociatePass());
2960b57cec5SDimitry Andric   // NaryReassociate on GEPs creates redundant common expressions, so run
2970b57cec5SDimitry Andric   // EarlyCSE after it.
2980b57cec5SDimitry Andric   addPass(createEarlyCSEPass());
2990b57cec5SDimitry Andric }
3000b57cec5SDimitry Andric 
3010b57cec5SDimitry Andric void NVPTXPassConfig::addIRPasses() {
3020b57cec5SDimitry Andric   // The following passes are known to not play well with virtual regs hanging
3030b57cec5SDimitry Andric   // around after register allocation (which in our case, is *all* registers).
3040b57cec5SDimitry Andric   // We explicitly disable them here.  We do, however, need some functionality
3050b57cec5SDimitry Andric   // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
3060b57cec5SDimitry Andric   // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
3070b57cec5SDimitry Andric   disablePass(&PrologEpilogCodeInserterID);
308bdd1243dSDimitry Andric   disablePass(&MachineLateInstrsCleanupID);
3090b57cec5SDimitry Andric   disablePass(&MachineCopyPropagationID);
3100b57cec5SDimitry Andric   disablePass(&TailDuplicateID);
3110b57cec5SDimitry Andric   disablePass(&StackMapLivenessID);
3120b57cec5SDimitry Andric   disablePass(&LiveDebugValuesID);
3130b57cec5SDimitry Andric   disablePass(&PostRAMachineSinkingID);
3140b57cec5SDimitry Andric   disablePass(&PostRASchedulerID);
3150b57cec5SDimitry Andric   disablePass(&FuncletLayoutID);
3160b57cec5SDimitry Andric   disablePass(&PatchableFunctionID);
3170b57cec5SDimitry Andric   disablePass(&ShrinkWrapID);
3180b57cec5SDimitry Andric 
31906c3fb27SDimitry Andric   addPass(createNVPTXAAWrapperPass());
32006c3fb27SDimitry Andric   addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
32106c3fb27SDimitry Andric     if (auto *WrapperPass = P.getAnalysisIfAvailable<NVPTXAAWrapperPass>())
32206c3fb27SDimitry Andric       AAR.addAAResult(WrapperPass->getResult());
32306c3fb27SDimitry Andric   }));
32406c3fb27SDimitry Andric 
3250b57cec5SDimitry Andric   // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
3260b57cec5SDimitry Andric   // it here does nothing.  But since we need it for correctness when lowering
3270b57cec5SDimitry Andric   // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
3280b57cec5SDimitry Andric   // call addEarlyAsPossiblePasses.
3290b57cec5SDimitry Andric   const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
3300b57cec5SDimitry Andric   addPass(createNVVMReflectPass(ST.getSmVersion()));
3310b57cec5SDimitry Andric 
3325f757f3fSDimitry Andric   if (getOptLevel() != CodeGenOptLevel::None)
3330b57cec5SDimitry Andric     addPass(createNVPTXImageOptimizerPass());
3340b57cec5SDimitry Andric   addPass(createNVPTXAssignValidGlobalNamesPass());
33506c3fb27SDimitry Andric   addPass(createGenericToNVVMLegacyPass());
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric   // NVPTXLowerArgs is required for correctness and should be run right
3380b57cec5SDimitry Andric   // before the address space inference passes.
33906c3fb27SDimitry Andric   addPass(createNVPTXLowerArgsPass());
3405f757f3fSDimitry Andric   if (getOptLevel() != CodeGenOptLevel::None) {
3410b57cec5SDimitry Andric     addAddressSpaceInferencePasses();
3420b57cec5SDimitry Andric     addStraightLineScalarOptimizationPasses();
3430b57cec5SDimitry Andric   }
3440b57cec5SDimitry Andric 
345*0fca6ea1SDimitry Andric   addPass(createAtomicExpandLegacyPass());
346*0fca6ea1SDimitry Andric   addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
34706c3fb27SDimitry Andric   addPass(createNVPTXCtorDtorLoweringLegacyPass());
34881ad6265SDimitry Andric 
3490b57cec5SDimitry Andric   // === LSR and other generic IR passes ===
3500b57cec5SDimitry Andric   TargetPassConfig::addIRPasses();
3510b57cec5SDimitry Andric   // EarlyCSE is not always strong enough to clean up what LSR produces. For
3520b57cec5SDimitry Andric   // example, GVN can combine
3530b57cec5SDimitry Andric   //
3540b57cec5SDimitry Andric   //   %0 = add %a, %b
3550b57cec5SDimitry Andric   //   %1 = add %b, %a
3560b57cec5SDimitry Andric   //
3570b57cec5SDimitry Andric   // and
3580b57cec5SDimitry Andric   //
3590b57cec5SDimitry Andric   //   %0 = shl nsw %a, 2
3600b57cec5SDimitry Andric   //   %1 = shl %a, 2
3610b57cec5SDimitry Andric   //
3620b57cec5SDimitry Andric   // but EarlyCSE can do neither of them.
3635f757f3fSDimitry Andric   if (getOptLevel() != CodeGenOptLevel::None) {
3640b57cec5SDimitry Andric     addEarlyCSEOrGVNPass();
3655ffd83dbSDimitry Andric     if (!DisableLoadStoreVectorizer)
3665ffd83dbSDimitry Andric       addPass(createLoadStoreVectorizerPass());
367349cc55cSDimitry Andric     addPass(createSROAPass());
3685ffd83dbSDimitry Andric   }
36906c3fb27SDimitry Andric 
3705f757f3fSDimitry Andric   const auto &Options = getNVPTXTargetMachine().Options;
3715f757f3fSDimitry Andric   addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
3725f757f3fSDimitry Andric                                           Options.NoTrapAfterNoreturn));
3730b57cec5SDimitry Andric }
3740b57cec5SDimitry Andric 
3750b57cec5SDimitry Andric bool NVPTXPassConfig::addInstSelector() {
3760b57cec5SDimitry Andric   const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   addPass(createLowerAggrCopies());
3790b57cec5SDimitry Andric   addPass(createAllocaHoisting());
3800b57cec5SDimitry Andric   addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric   if (!ST.hasImageHandles())
3830b57cec5SDimitry Andric     addPass(createNVPTXReplaceImageHandlesPass());
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric   return false;
3860b57cec5SDimitry Andric }
3870b57cec5SDimitry Andric 
3880b57cec5SDimitry Andric void NVPTXPassConfig::addPreRegAlloc() {
3890b57cec5SDimitry Andric   // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
3900b57cec5SDimitry Andric   addPass(createNVPTXProxyRegErasurePass());
3910b57cec5SDimitry Andric }
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric void NVPTXPassConfig::addPostRegAlloc() {
394349cc55cSDimitry Andric   addPass(createNVPTXPrologEpilogPass());
3955f757f3fSDimitry Andric   if (getOptLevel() != CodeGenOptLevel::None) {
3960b57cec5SDimitry Andric     // NVPTXPrologEpilogPass calculates frame object offset and replace frame
3970b57cec5SDimitry Andric     // index with VRFrame register. NVPTXPeephole need to be run after that and
3980b57cec5SDimitry Andric     // will replace VRFrame with VRFrameLocal when possible.
3990b57cec5SDimitry Andric     addPass(createNVPTXPeephole());
4000b57cec5SDimitry Andric   }
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
4040b57cec5SDimitry Andric   return nullptr; // No reg alloc
4050b57cec5SDimitry Andric }
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric void NVPTXPassConfig::addFastRegAlloc() {
4080b57cec5SDimitry Andric   addPass(&PHIEliminationID);
4090b57cec5SDimitry Andric   addPass(&TwoAddressInstructionPassID);
4100b57cec5SDimitry Andric }
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric void NVPTXPassConfig::addOptimizedRegAlloc() {
4130b57cec5SDimitry Andric   addPass(&ProcessImplicitDefsID);
4140b57cec5SDimitry Andric   addPass(&LiveVariablesID);
4150b57cec5SDimitry Andric   addPass(&MachineLoopInfoID);
4160b57cec5SDimitry Andric   addPass(&PHIEliminationID);
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric   addPass(&TwoAddressInstructionPassID);
4190b57cec5SDimitry Andric   addPass(&RegisterCoalescerID);
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric   // PreRA instruction scheduling.
4220b57cec5SDimitry Andric   if (addPass(&MachineSchedulerID))
4230b57cec5SDimitry Andric     printAndVerify("After Machine Scheduling");
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric   addPass(&StackSlotColoringID);
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric   // FIXME: Needs physical registers
4280b57cec5SDimitry Andric   // addPass(&MachineLICMID);
4290b57cec5SDimitry Andric 
4300b57cec5SDimitry Andric   printAndVerify("After StackSlotColoring");
4310b57cec5SDimitry Andric }
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric void NVPTXPassConfig::addMachineSSAOptimization() {
4340b57cec5SDimitry Andric   // Pre-ra tail duplication.
4350b57cec5SDimitry Andric   if (addPass(&EarlyTailDuplicateID))
4360b57cec5SDimitry Andric     printAndVerify("After Pre-RegAlloc TailDuplicate");
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric   // Optimize PHIs before DCE: removing dead PHI cycles may make more
4390b57cec5SDimitry Andric   // instructions dead.
4400b57cec5SDimitry Andric   addPass(&OptimizePHIsID);
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric   // This pass merges large allocas. StackSlotColoring is a different pass
4430b57cec5SDimitry Andric   // which merges spill slots.
4440b57cec5SDimitry Andric   addPass(&StackColoringID);
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric   // If the target requests it, assign local variables to stack slots relative
4470b57cec5SDimitry Andric   // to one another and simplify frame index references where possible.
4480b57cec5SDimitry Andric   addPass(&LocalStackSlotAllocationID);
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric   // With optimization, dead code should already be eliminated. However
4510b57cec5SDimitry Andric   // there is one known exception: lowered code for arguments that are only
4520b57cec5SDimitry Andric   // used by tail calls, where the tail calls reuse the incoming stack
4530b57cec5SDimitry Andric   // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
4540b57cec5SDimitry Andric   addPass(&DeadMachineInstructionElimID);
4550b57cec5SDimitry Andric   printAndVerify("After codegen DCE pass");
4560b57cec5SDimitry Andric 
4570b57cec5SDimitry Andric   // Allow targets to insert passes that improve instruction level parallelism,
4580b57cec5SDimitry Andric   // like if-conversion. Such passes will typically need dominator trees and
4590b57cec5SDimitry Andric   // loop info, just like LICM and CSE below.
4600b57cec5SDimitry Andric   if (addILPOpts())
4610b57cec5SDimitry Andric     printAndVerify("After ILP optimizations");
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric   addPass(&EarlyMachineLICMID);
4640b57cec5SDimitry Andric   addPass(&MachineCSEID);
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric   addPass(&MachineSinkingID);
4670b57cec5SDimitry Andric   printAndVerify("After Machine LICM, CSE and Sinking passes");
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric   addPass(&PeepholeOptimizerID);
4700b57cec5SDimitry Andric   printAndVerify("After codegen peephole optimization pass");
4710b57cec5SDimitry Andric }
472