10b57cec5SDimitry Andric //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Top-level implementation for the NVPTX target. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "NVPTXTargetMachine.h" 140b57cec5SDimitry Andric #include "NVPTX.h" 1506c3fb27SDimitry Andric #include "NVPTXAliasAnalysis.h" 160b57cec5SDimitry Andric #include "NVPTXAllocaHoisting.h" 17fe6060f1SDimitry Andric #include "NVPTXAtomicLower.h" 1806c3fb27SDimitry Andric #include "NVPTXCtorDtorLowering.h" 190b57cec5SDimitry Andric #include "NVPTXLowerAggrCopies.h" 20bdd1243dSDimitry Andric #include "NVPTXMachineFunctionInfo.h" 210b57cec5SDimitry Andric #include "NVPTXTargetObjectFile.h" 220b57cec5SDimitry Andric #include "NVPTXTargetTransformInfo.h" 230b57cec5SDimitry Andric #include "TargetInfo/NVPTXTargetInfo.h" 240b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 260b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h" 270b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 28349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h" 29349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h" 300b57cec5SDimitry Andric #include "llvm/Pass.h" 31e8d8bef9SDimitry Andric #include "llvm/Passes/PassBuilder.h" 320b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 330b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 340b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h" 3506c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h" 36*0fca6ea1SDimitry Andric #include "llvm/Transforms/IPO/ExpandVariadics.h" 370b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 380b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/GVN.h" 3906c3fb27SDimitry Andric #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" 400b57cec5SDimitry Andric #include <cassert> 41bdd1243dSDimitry Andric #include <optional> 420b57cec5SDimitry Andric #include <string> 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric using namespace llvm; 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric // LSV is still relatively new; this switch lets us turn it off in case we 470b57cec5SDimitry Andric // encounter (or suspect) a bug. 480b57cec5SDimitry Andric static cl::opt<bool> 490b57cec5SDimitry Andric DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer", 500b57cec5SDimitry Andric cl::desc("Disable load/store vectorizer"), 510b57cec5SDimitry Andric cl::init(false), cl::Hidden); 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric // TODO: Remove this flag when we are confident with no regressions. 540b57cec5SDimitry Andric static cl::opt<bool> DisableRequireStructuredCFG( 550b57cec5SDimitry Andric "disable-nvptx-require-structured-cfg", 560b57cec5SDimitry Andric cl::desc("Transitional flag to turn off NVPTX's requirement on preserving " 570b57cec5SDimitry Andric "structured CFG. The requirement should be disabled only when " 580b57cec5SDimitry Andric "unexpected regressions happen."), 590b57cec5SDimitry Andric cl::init(false), cl::Hidden); 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric static cl::opt<bool> UseShortPointersOpt( 620b57cec5SDimitry Andric "nvptx-short-ptr", 630b57cec5SDimitry Andric cl::desc( 640b57cec5SDimitry Andric "Use 32-bit pointers for accessing const/local/shared address spaces."), 650b57cec5SDimitry Andric cl::init(false), cl::Hidden); 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric namespace llvm { 680b57cec5SDimitry Andric 6906c3fb27SDimitry Andric void initializeGenericToNVVMLegacyPassPass(PassRegistry &); 700b57cec5SDimitry Andric void initializeNVPTXAllocaHoistingPass(PassRegistry &); 710b57cec5SDimitry Andric void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry &); 72bdd1243dSDimitry Andric void initializeNVPTXAtomicLowerPass(PassRegistry &); 7306c3fb27SDimitry Andric void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &); 740b57cec5SDimitry Andric void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); 750b57cec5SDimitry Andric void initializeNVPTXLowerAllocaPass(PassRegistry &); 7606c3fb27SDimitry Andric void initializeNVPTXLowerUnreachablePass(PassRegistry &); 7706c3fb27SDimitry Andric void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &); 78bdd1243dSDimitry Andric void initializeNVPTXLowerArgsPass(PassRegistry &); 790b57cec5SDimitry Andric void initializeNVPTXProxyRegErasurePass(PassRegistry &); 80bdd1243dSDimitry Andric void initializeNVVMIntrRangePass(PassRegistry &); 81bdd1243dSDimitry Andric void initializeNVVMReflectPass(PassRegistry &); 8206c3fb27SDimitry Andric void initializeNVPTXAAWrapperPassPass(PassRegistry &); 8306c3fb27SDimitry Andric void initializeNVPTXExternalAAWrapperPass(PassRegistry &); 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric } // end namespace llvm 860b57cec5SDimitry Andric 87480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() { 880b57cec5SDimitry Andric // Register the target. 890b57cec5SDimitry Andric RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32()); 900b57cec5SDimitry Andric RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64()); 910b57cec5SDimitry Andric 92bdd1243dSDimitry Andric PassRegistry &PR = *PassRegistry::getPassRegistry(); 930b57cec5SDimitry Andric // FIXME: This pass is really intended to be invoked during IR optimization, 940b57cec5SDimitry Andric // but it's very NVPTX-specific. 950b57cec5SDimitry Andric initializeNVVMReflectPass(PR); 960b57cec5SDimitry Andric initializeNVVMIntrRangePass(PR); 9706c3fb27SDimitry Andric initializeGenericToNVVMLegacyPassPass(PR); 980b57cec5SDimitry Andric initializeNVPTXAllocaHoistingPass(PR); 990b57cec5SDimitry Andric initializeNVPTXAssignValidGlobalNamesPass(PR); 100fe6060f1SDimitry Andric initializeNVPTXAtomicLowerPass(PR); 1010b57cec5SDimitry Andric initializeNVPTXLowerArgsPass(PR); 1020b57cec5SDimitry Andric initializeNVPTXLowerAllocaPass(PR); 10306c3fb27SDimitry Andric initializeNVPTXLowerUnreachablePass(PR); 10406c3fb27SDimitry Andric initializeNVPTXCtorDtorLoweringLegacyPass(PR); 1050b57cec5SDimitry Andric initializeNVPTXLowerAggrCopiesPass(PR); 1060b57cec5SDimitry Andric initializeNVPTXProxyRegErasurePass(PR); 107*0fca6ea1SDimitry Andric initializeNVPTXDAGToDAGISelLegacyPass(PR); 10806c3fb27SDimitry Andric initializeNVPTXAAWrapperPassPass(PR); 10906c3fb27SDimitry Andric initializeNVPTXExternalAAWrapperPass(PR); 1100b57cec5SDimitry Andric } 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) { 1130b57cec5SDimitry Andric std::string Ret = "e"; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric if (!is64Bit) 1160b57cec5SDimitry Andric Ret += "-p:32:32"; 1170b57cec5SDimitry Andric else if (UseShortPointers) 1180b57cec5SDimitry Andric Ret += "-p3:32:32-p4:32:32-p5:32:32"; 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64"; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric return Ret; 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, 1260b57cec5SDimitry Andric StringRef CPU, StringRef FS, 1270b57cec5SDimitry Andric const TargetOptions &Options, 128bdd1243dSDimitry Andric std::optional<Reloc::Model> RM, 129bdd1243dSDimitry Andric std::optional<CodeModel::Model> CM, 1305f757f3fSDimitry Andric CodeGenOptLevel OL, bool is64bit) 1310b57cec5SDimitry Andric // The pic relocation model is used regardless of what the client has 1320b57cec5SDimitry Andric // specified, as it is the only relocation model currently supported. 1330b57cec5SDimitry Andric : LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT, 1340b57cec5SDimitry Andric CPU, FS, Options, Reloc::PIC_, 1350b57cec5SDimitry Andric getEffectiveCodeModel(CM, CodeModel::Small), OL), 136*0fca6ea1SDimitry Andric is64bit(is64bit), TLOF(std::make_unique<NVPTXTargetObjectFile>()), 137bdd1243dSDimitry Andric Subtarget(TT, std::string(CPU), std::string(FS), *this), 138bdd1243dSDimitry Andric StrPool(StrAlloc) { 1390b57cec5SDimitry Andric if (TT.getOS() == Triple::NVCL) 1400b57cec5SDimitry Andric drvInterface = NVPTX::NVCL; 1410b57cec5SDimitry Andric else 1420b57cec5SDimitry Andric drvInterface = NVPTX::CUDA; 1430b57cec5SDimitry Andric if (!DisableRequireStructuredCFG) 1440b57cec5SDimitry Andric setRequiresStructuredCFG(true); 1450b57cec5SDimitry Andric initAsmInfo(); 1460b57cec5SDimitry Andric } 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric NVPTXTargetMachine::~NVPTXTargetMachine() = default; 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric void NVPTXTargetMachine32::anchor() {} 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, 1530b57cec5SDimitry Andric StringRef CPU, StringRef FS, 1540b57cec5SDimitry Andric const TargetOptions &Options, 155bdd1243dSDimitry Andric std::optional<Reloc::Model> RM, 156bdd1243dSDimitry Andric std::optional<CodeModel::Model> CM, 1575f757f3fSDimitry Andric CodeGenOptLevel OL, bool JIT) 1580b57cec5SDimitry Andric : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric void NVPTXTargetMachine64::anchor() {} 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, 1630b57cec5SDimitry Andric StringRef CPU, StringRef FS, 1640b57cec5SDimitry Andric const TargetOptions &Options, 165bdd1243dSDimitry Andric std::optional<Reloc::Model> RM, 166bdd1243dSDimitry Andric std::optional<CodeModel::Model> CM, 1675f757f3fSDimitry Andric CodeGenOptLevel OL, bool JIT) 1680b57cec5SDimitry Andric : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric namespace { 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric class NVPTXPassConfig : public TargetPassConfig { 1730b57cec5SDimitry Andric public: 1740b57cec5SDimitry Andric NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM) 1750b57cec5SDimitry Andric : TargetPassConfig(TM, PM) {} 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric NVPTXTargetMachine &getNVPTXTargetMachine() const { 1780b57cec5SDimitry Andric return getTM<NVPTXTargetMachine>(); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric void addIRPasses() override; 1820b57cec5SDimitry Andric bool addInstSelector() override; 1830b57cec5SDimitry Andric void addPreRegAlloc() override; 1840b57cec5SDimitry Andric void addPostRegAlloc() override; 1850b57cec5SDimitry Andric void addMachineSSAOptimization() override; 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric FunctionPass *createTargetRegisterAllocator(bool) override; 1880b57cec5SDimitry Andric void addFastRegAlloc() override; 1890b57cec5SDimitry Andric void addOptimizedRegAlloc() override; 1900b57cec5SDimitry Andric 191e8d8bef9SDimitry Andric bool addRegAssignAndRewriteFast() override { 1920b57cec5SDimitry Andric llvm_unreachable("should not be used"); 1930b57cec5SDimitry Andric } 1940b57cec5SDimitry Andric 195e8d8bef9SDimitry Andric bool addRegAssignAndRewriteOptimized() override { 1960b57cec5SDimitry Andric llvm_unreachable("should not be used"); 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric private: 2000b57cec5SDimitry Andric // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This 2010b57cec5SDimitry Andric // function is only called in opt mode. 2020b57cec5SDimitry Andric void addEarlyCSEOrGVNPass(); 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric // Add passes that propagate special memory spaces. 2050b57cec5SDimitry Andric void addAddressSpaceInferencePasses(); 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric // Add passes that perform straight-line scalar optimizations. 2080b57cec5SDimitry Andric void addStraightLineScalarOptimizationPasses(); 2090b57cec5SDimitry Andric }; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric } // end anonymous namespace 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 2140b57cec5SDimitry Andric return new NVPTXPassConfig(*this, PM); 2150b57cec5SDimitry Andric } 2160b57cec5SDimitry Andric 217bdd1243dSDimitry Andric MachineFunctionInfo *NVPTXTargetMachine::createMachineFunctionInfo( 218bdd1243dSDimitry Andric BumpPtrAllocator &Allocator, const Function &F, 219bdd1243dSDimitry Andric const TargetSubtargetInfo *STI) const { 220bdd1243dSDimitry Andric return NVPTXMachineFunctionInfo::create<NVPTXMachineFunctionInfo>(Allocator, 221bdd1243dSDimitry Andric F, STI); 2220b57cec5SDimitry Andric } 2230b57cec5SDimitry Andric 22406c3fb27SDimitry Andric void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) { 22506c3fb27SDimitry Andric AAM.registerFunctionAnalysis<NVPTXAA>(); 22606c3fb27SDimitry Andric } 22706c3fb27SDimitry Andric 228*0fca6ea1SDimitry Andric void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { 229*0fca6ea1SDimitry Andric #define GET_PASS_REGISTRY "NVPTXPassRegistry.def" 230*0fca6ea1SDimitry Andric #include "llvm/Passes/TargetPassRegistry.inc" 23106c3fb27SDimitry Andric 232e8d8bef9SDimitry Andric PB.registerPipelineStartEPCallback( 233349cc55cSDimitry Andric [this](ModulePassManager &PM, OptimizationLevel Level) { 234fe6060f1SDimitry Andric FunctionPassManager FPM; 235e8d8bef9SDimitry Andric FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion())); 236*0fca6ea1SDimitry Andric // Note: NVVMIntrRangePass was causing numerical discrepancies at one 237*0fca6ea1SDimitry Andric // point, if issues crop up, consider disabling. 238*0fca6ea1SDimitry Andric FPM.addPass(NVVMIntrRangePass()); 239e8d8bef9SDimitry Andric PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 240e8d8bef9SDimitry Andric }); 241e8d8bef9SDimitry Andric } 242e8d8bef9SDimitry Andric 2430b57cec5SDimitry Andric TargetTransformInfo 24481ad6265SDimitry Andric NVPTXTargetMachine::getTargetTransformInfo(const Function &F) const { 2450b57cec5SDimitry Andric return TargetTransformInfo(NVPTXTTIImpl(this, F)); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 248349cc55cSDimitry Andric std::pair<const Value *, unsigned> 249349cc55cSDimitry Andric NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const { 250349cc55cSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(V)) { 251349cc55cSDimitry Andric switch (II->getIntrinsicID()) { 252349cc55cSDimitry Andric case Intrinsic::nvvm_isspacep_const: 253349cc55cSDimitry Andric return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_CONST); 254349cc55cSDimitry Andric case Intrinsic::nvvm_isspacep_global: 255349cc55cSDimitry Andric return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_GLOBAL); 256349cc55cSDimitry Andric case Intrinsic::nvvm_isspacep_local: 257349cc55cSDimitry Andric return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL); 258349cc55cSDimitry Andric case Intrinsic::nvvm_isspacep_shared: 25906c3fb27SDimitry Andric case Intrinsic::nvvm_isspacep_shared_cluster: 260349cc55cSDimitry Andric return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED); 261349cc55cSDimitry Andric default: 262349cc55cSDimitry Andric break; 263349cc55cSDimitry Andric } 264349cc55cSDimitry Andric } 265349cc55cSDimitry Andric return std::make_pair(nullptr, -1); 266349cc55cSDimitry Andric } 267349cc55cSDimitry Andric 2680b57cec5SDimitry Andric void NVPTXPassConfig::addEarlyCSEOrGVNPass() { 2695f757f3fSDimitry Andric if (getOptLevel() == CodeGenOptLevel::Aggressive) 2700b57cec5SDimitry Andric addPass(createGVNPass()); 2710b57cec5SDimitry Andric else 2720b57cec5SDimitry Andric addPass(createEarlyCSEPass()); 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric void NVPTXPassConfig::addAddressSpaceInferencePasses() { 2760b57cec5SDimitry Andric // NVPTXLowerArgs emits alloca for byval parameters which can often 2770b57cec5SDimitry Andric // be eliminated by SROA. 2780b57cec5SDimitry Andric addPass(createSROAPass()); 2790b57cec5SDimitry Andric addPass(createNVPTXLowerAllocaPass()); 2800b57cec5SDimitry Andric addPass(createInferAddressSpacesPass()); 281fe6060f1SDimitry Andric addPass(createNVPTXAtomicLowerPass()); 2820b57cec5SDimitry Andric } 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() { 2850b57cec5SDimitry Andric addPass(createSeparateConstOffsetFromGEPPass()); 2860b57cec5SDimitry Andric addPass(createSpeculativeExecutionPass()); 2870b57cec5SDimitry Andric // ReassociateGEPs exposes more opportunites for SLSR. See 2880b57cec5SDimitry Andric // the example in reassociate-geps-and-slsr.ll. 2890b57cec5SDimitry Andric addPass(createStraightLineStrengthReducePass()); 2900b57cec5SDimitry Andric // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or 2910b57cec5SDimitry Andric // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE 2920b57cec5SDimitry Andric // for some of our benchmarks. 2930b57cec5SDimitry Andric addEarlyCSEOrGVNPass(); 2940b57cec5SDimitry Andric // Run NaryReassociate after EarlyCSE/GVN to be more effective. 2950b57cec5SDimitry Andric addPass(createNaryReassociatePass()); 2960b57cec5SDimitry Andric // NaryReassociate on GEPs creates redundant common expressions, so run 2970b57cec5SDimitry Andric // EarlyCSE after it. 2980b57cec5SDimitry Andric addPass(createEarlyCSEPass()); 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric void NVPTXPassConfig::addIRPasses() { 3020b57cec5SDimitry Andric // The following passes are known to not play well with virtual regs hanging 3030b57cec5SDimitry Andric // around after register allocation (which in our case, is *all* registers). 3040b57cec5SDimitry Andric // We explicitly disable them here. We do, however, need some functionality 3050b57cec5SDimitry Andric // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 3060b57cec5SDimitry Andric // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 3070b57cec5SDimitry Andric disablePass(&PrologEpilogCodeInserterID); 308bdd1243dSDimitry Andric disablePass(&MachineLateInstrsCleanupID); 3090b57cec5SDimitry Andric disablePass(&MachineCopyPropagationID); 3100b57cec5SDimitry Andric disablePass(&TailDuplicateID); 3110b57cec5SDimitry Andric disablePass(&StackMapLivenessID); 3120b57cec5SDimitry Andric disablePass(&LiveDebugValuesID); 3130b57cec5SDimitry Andric disablePass(&PostRAMachineSinkingID); 3140b57cec5SDimitry Andric disablePass(&PostRASchedulerID); 3150b57cec5SDimitry Andric disablePass(&FuncletLayoutID); 3160b57cec5SDimitry Andric disablePass(&PatchableFunctionID); 3170b57cec5SDimitry Andric disablePass(&ShrinkWrapID); 3180b57cec5SDimitry Andric 31906c3fb27SDimitry Andric addPass(createNVPTXAAWrapperPass()); 32006c3fb27SDimitry Andric addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) { 32106c3fb27SDimitry Andric if (auto *WrapperPass = P.getAnalysisIfAvailable<NVPTXAAWrapperPass>()) 32206c3fb27SDimitry Andric AAR.addAAResult(WrapperPass->getResult()); 32306c3fb27SDimitry Andric })); 32406c3fb27SDimitry Andric 3250b57cec5SDimitry Andric // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running 3260b57cec5SDimitry Andric // it here does nothing. But since we need it for correctness when lowering 3270b57cec5SDimitry Andric // to NVPTX, run it here too, in case whoever built our pass pipeline didn't 3280b57cec5SDimitry Andric // call addEarlyAsPossiblePasses. 3290b57cec5SDimitry Andric const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); 3300b57cec5SDimitry Andric addPass(createNVVMReflectPass(ST.getSmVersion())); 3310b57cec5SDimitry Andric 3325f757f3fSDimitry Andric if (getOptLevel() != CodeGenOptLevel::None) 3330b57cec5SDimitry Andric addPass(createNVPTXImageOptimizerPass()); 3340b57cec5SDimitry Andric addPass(createNVPTXAssignValidGlobalNamesPass()); 33506c3fb27SDimitry Andric addPass(createGenericToNVVMLegacyPass()); 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric // NVPTXLowerArgs is required for correctness and should be run right 3380b57cec5SDimitry Andric // before the address space inference passes. 33906c3fb27SDimitry Andric addPass(createNVPTXLowerArgsPass()); 3405f757f3fSDimitry Andric if (getOptLevel() != CodeGenOptLevel::None) { 3410b57cec5SDimitry Andric addAddressSpaceInferencePasses(); 3420b57cec5SDimitry Andric addStraightLineScalarOptimizationPasses(); 3430b57cec5SDimitry Andric } 3440b57cec5SDimitry Andric 345*0fca6ea1SDimitry Andric addPass(createAtomicExpandLegacyPass()); 346*0fca6ea1SDimitry Andric addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering)); 34706c3fb27SDimitry Andric addPass(createNVPTXCtorDtorLoweringLegacyPass()); 34881ad6265SDimitry Andric 3490b57cec5SDimitry Andric // === LSR and other generic IR passes === 3500b57cec5SDimitry Andric TargetPassConfig::addIRPasses(); 3510b57cec5SDimitry Andric // EarlyCSE is not always strong enough to clean up what LSR produces. For 3520b57cec5SDimitry Andric // example, GVN can combine 3530b57cec5SDimitry Andric // 3540b57cec5SDimitry Andric // %0 = add %a, %b 3550b57cec5SDimitry Andric // %1 = add %b, %a 3560b57cec5SDimitry Andric // 3570b57cec5SDimitry Andric // and 3580b57cec5SDimitry Andric // 3590b57cec5SDimitry Andric // %0 = shl nsw %a, 2 3600b57cec5SDimitry Andric // %1 = shl %a, 2 3610b57cec5SDimitry Andric // 3620b57cec5SDimitry Andric // but EarlyCSE can do neither of them. 3635f757f3fSDimitry Andric if (getOptLevel() != CodeGenOptLevel::None) { 3640b57cec5SDimitry Andric addEarlyCSEOrGVNPass(); 3655ffd83dbSDimitry Andric if (!DisableLoadStoreVectorizer) 3665ffd83dbSDimitry Andric addPass(createLoadStoreVectorizerPass()); 367349cc55cSDimitry Andric addPass(createSROAPass()); 3685ffd83dbSDimitry Andric } 36906c3fb27SDimitry Andric 3705f757f3fSDimitry Andric const auto &Options = getNVPTXTargetMachine().Options; 3715f757f3fSDimitry Andric addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable, 3725f757f3fSDimitry Andric Options.NoTrapAfterNoreturn)); 3730b57cec5SDimitry Andric } 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric bool NVPTXPassConfig::addInstSelector() { 3760b57cec5SDimitry Andric const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric addPass(createLowerAggrCopies()); 3790b57cec5SDimitry Andric addPass(createAllocaHoisting()); 3800b57cec5SDimitry Andric addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric if (!ST.hasImageHandles()) 3830b57cec5SDimitry Andric addPass(createNVPTXReplaceImageHandlesPass()); 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric return false; 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric void NVPTXPassConfig::addPreRegAlloc() { 3890b57cec5SDimitry Andric // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive. 3900b57cec5SDimitry Andric addPass(createNVPTXProxyRegErasurePass()); 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric void NVPTXPassConfig::addPostRegAlloc() { 394349cc55cSDimitry Andric addPass(createNVPTXPrologEpilogPass()); 3955f757f3fSDimitry Andric if (getOptLevel() != CodeGenOptLevel::None) { 3960b57cec5SDimitry Andric // NVPTXPrologEpilogPass calculates frame object offset and replace frame 3970b57cec5SDimitry Andric // index with VRFrame register. NVPTXPeephole need to be run after that and 3980b57cec5SDimitry Andric // will replace VRFrame with VRFrameLocal when possible. 3990b57cec5SDimitry Andric addPass(createNVPTXPeephole()); 4000b57cec5SDimitry Andric } 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 4040b57cec5SDimitry Andric return nullptr; // No reg alloc 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric void NVPTXPassConfig::addFastRegAlloc() { 4080b57cec5SDimitry Andric addPass(&PHIEliminationID); 4090b57cec5SDimitry Andric addPass(&TwoAddressInstructionPassID); 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric void NVPTXPassConfig::addOptimizedRegAlloc() { 4130b57cec5SDimitry Andric addPass(&ProcessImplicitDefsID); 4140b57cec5SDimitry Andric addPass(&LiveVariablesID); 4150b57cec5SDimitry Andric addPass(&MachineLoopInfoID); 4160b57cec5SDimitry Andric addPass(&PHIEliminationID); 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric addPass(&TwoAddressInstructionPassID); 4190b57cec5SDimitry Andric addPass(&RegisterCoalescerID); 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric // PreRA instruction scheduling. 4220b57cec5SDimitry Andric if (addPass(&MachineSchedulerID)) 4230b57cec5SDimitry Andric printAndVerify("After Machine Scheduling"); 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric addPass(&StackSlotColoringID); 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric // FIXME: Needs physical registers 4280b57cec5SDimitry Andric // addPass(&MachineLICMID); 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric printAndVerify("After StackSlotColoring"); 4310b57cec5SDimitry Andric } 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric void NVPTXPassConfig::addMachineSSAOptimization() { 4340b57cec5SDimitry Andric // Pre-ra tail duplication. 4350b57cec5SDimitry Andric if (addPass(&EarlyTailDuplicateID)) 4360b57cec5SDimitry Andric printAndVerify("After Pre-RegAlloc TailDuplicate"); 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric // Optimize PHIs before DCE: removing dead PHI cycles may make more 4390b57cec5SDimitry Andric // instructions dead. 4400b57cec5SDimitry Andric addPass(&OptimizePHIsID); 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric // This pass merges large allocas. StackSlotColoring is a different pass 4430b57cec5SDimitry Andric // which merges spill slots. 4440b57cec5SDimitry Andric addPass(&StackColoringID); 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric // If the target requests it, assign local variables to stack slots relative 4470b57cec5SDimitry Andric // to one another and simplify frame index references where possible. 4480b57cec5SDimitry Andric addPass(&LocalStackSlotAllocationID); 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric // With optimization, dead code should already be eliminated. However 4510b57cec5SDimitry Andric // there is one known exception: lowered code for arguments that are only 4520b57cec5SDimitry Andric // used by tail calls, where the tail calls reuse the incoming stack 4530b57cec5SDimitry Andric // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 4540b57cec5SDimitry Andric addPass(&DeadMachineInstructionElimID); 4550b57cec5SDimitry Andric printAndVerify("After codegen DCE pass"); 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric // Allow targets to insert passes that improve instruction level parallelism, 4580b57cec5SDimitry Andric // like if-conversion. Such passes will typically need dominator trees and 4590b57cec5SDimitry Andric // loop info, just like LICM and CSE below. 4600b57cec5SDimitry Andric if (addILPOpts()) 4610b57cec5SDimitry Andric printAndVerify("After ILP optimizations"); 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric addPass(&EarlyMachineLICMID); 4640b57cec5SDimitry Andric addPass(&MachineCSEID); 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric addPass(&MachineSinkingID); 4670b57cec5SDimitry Andric printAndVerify("After Machine LICM, CSE and Sinking passes"); 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric addPass(&PeepholeOptimizerID); 4700b57cec5SDimitry Andric printAndVerify("After codegen peephole optimization pass"); 4710b57cec5SDimitry Andric } 472