10b57cec5SDimitry Andric //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file defines the interfaces that NVPTX uses to lower LLVM code into a 100b57cec5SDimitry Andric // selection DAG. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "NVPTXISelLowering.h" 150b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXBaseInfo.h" 160b57cec5SDimitry Andric #include "NVPTX.h" 170b57cec5SDimitry Andric #include "NVPTXSubtarget.h" 180b57cec5SDimitry Andric #include "NVPTXTargetMachine.h" 190b57cec5SDimitry Andric #include "NVPTXTargetObjectFile.h" 200b57cec5SDimitry Andric #include "NVPTXUtilities.h" 210b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 22e8d8bef9SDimitry Andric #include "llvm/ADT/STLExtras.h" 230b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 240b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h" 265f757f3fSDimitry Andric #include "llvm/CodeGen/ISDOpcodes.h" 270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 290b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAG.h" 300b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGNodes.h" 310b57cec5SDimitry Andric #include "llvm/CodeGen/TargetCallingConv.h" 320b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 330b57cec5SDimitry Andric #include "llvm/CodeGen/ValueTypes.h" 340fca6ea1SDimitry Andric #include "llvm/CodeGenTypes/MachineValueType.h" 350b57cec5SDimitry Andric #include "llvm/IR/Argument.h" 360b57cec5SDimitry Andric #include "llvm/IR/Attributes.h" 370b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 380b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 390b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 405f757f3fSDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 4181ad6265SDimitry Andric #include "llvm/IR/FPEnv.h" 420b57cec5SDimitry Andric #include "llvm/IR/Function.h" 430b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 440b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 450b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 46480093f4SDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h" 470b57cec5SDimitry Andric #include "llvm/IR/Module.h" 480b57cec5SDimitry Andric #include "llvm/IR/Type.h" 490b57cec5SDimitry Andric #include "llvm/IR/Value.h" 500fca6ea1SDimitry Andric #include "llvm/Support/Alignment.h" 510b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 520b57cec5SDimitry Andric #include "llvm/Support/CodeGen.h" 530b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 540b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 550b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 560b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 570b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h" 580b57cec5SDimitry Andric #include <algorithm> 590b57cec5SDimitry Andric #include <cassert> 60bdd1243dSDimitry Andric #include <cmath> 610b57cec5SDimitry Andric #include <cstdint> 620b57cec5SDimitry Andric #include <iterator> 630fca6ea1SDimitry Andric #include <optional> 640b57cec5SDimitry Andric #include <sstream> 650b57cec5SDimitry Andric #include <string> 660b57cec5SDimitry Andric #include <utility> 670b57cec5SDimitry Andric #include <vector> 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric #define DEBUG_TYPE "nvptx-lower" 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric using namespace llvm; 720b57cec5SDimitry Andric 73e8d8bef9SDimitry Andric static std::atomic<unsigned> GlobalUniqueCallSite; 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric static cl::opt<bool> sched4reg( 760b57cec5SDimitry Andric "nvptx-sched4reg", 770b57cec5SDimitry Andric cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); 780b57cec5SDimitry Andric 7981ad6265SDimitry Andric static cl::opt<unsigned> FMAContractLevelOpt( 8081ad6265SDimitry Andric "nvptx-fma-level", cl::Hidden, 810b57cec5SDimitry Andric cl::desc("NVPTX Specific: FMA contraction (0: don't do it" 820b57cec5SDimitry Andric " 1: do it 2: do it aggressively"), 830b57cec5SDimitry Andric cl::init(2)); 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric static cl::opt<int> UsePrecDivF32( 8681ad6265SDimitry Andric "nvptx-prec-divf32", cl::Hidden, 870b57cec5SDimitry Andric cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" 880b57cec5SDimitry Andric " IEEE Compliant F32 div.rnd if available."), 890b57cec5SDimitry Andric cl::init(2)); 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric static cl::opt<bool> UsePrecSqrtF32( 920b57cec5SDimitry Andric "nvptx-prec-sqrtf32", cl::Hidden, 930b57cec5SDimitry Andric cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), 940b57cec5SDimitry Andric cl::init(true)); 950b57cec5SDimitry Andric 9606c3fb27SDimitry Andric static cl::opt<bool> ForceMinByValParamAlign( 9706c3fb27SDimitry Andric "nvptx-force-min-byval-param-align", cl::Hidden, 9806c3fb27SDimitry Andric cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" 9906c3fb27SDimitry Andric " params of device functions."), 10006c3fb27SDimitry Andric cl::init(false)); 10106c3fb27SDimitry Andric 1020b57cec5SDimitry Andric int NVPTXTargetLowering::getDivF32Level() const { 1030b57cec5SDimitry Andric if (UsePrecDivF32.getNumOccurrences() > 0) { 1040b57cec5SDimitry Andric // If nvptx-prec-div32=N is used on the command-line, always honor it 1050b57cec5SDimitry Andric return UsePrecDivF32; 1060b57cec5SDimitry Andric } else { 1070b57cec5SDimitry Andric // Otherwise, use div.approx if fast math is enabled 1080b57cec5SDimitry Andric if (getTargetMachine().Options.UnsafeFPMath) 1090b57cec5SDimitry Andric return 0; 1100b57cec5SDimitry Andric else 1110b57cec5SDimitry Andric return 2; 1120b57cec5SDimitry Andric } 1130b57cec5SDimitry Andric } 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric bool NVPTXTargetLowering::usePrecSqrtF32() const { 1160b57cec5SDimitry Andric if (UsePrecSqrtF32.getNumOccurrences() > 0) { 1170b57cec5SDimitry Andric // If nvptx-prec-sqrtf32 is used on the command-line, always honor it 1180b57cec5SDimitry Andric return UsePrecSqrtF32; 1190b57cec5SDimitry Andric } else { 1200b57cec5SDimitry Andric // Otherwise, use sqrt.approx if fast math is enabled 1210b57cec5SDimitry Andric return !getTargetMachine().Options.UnsafeFPMath; 1220b57cec5SDimitry Andric } 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { 1265ffd83dbSDimitry Andric return MF.getDenormalMode(APFloat::IEEEsingle()).Output == 1275ffd83dbSDimitry Andric DenormalMode::PreserveSign; 1280b57cec5SDimitry Andric } 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric static bool IsPTXVectorType(MVT VT) { 1310b57cec5SDimitry Andric switch (VT.SimpleTy) { 1320b57cec5SDimitry Andric default: 1330b57cec5SDimitry Andric return false; 1340b57cec5SDimitry Andric case MVT::v2i1: 1350b57cec5SDimitry Andric case MVT::v4i1: 1360b57cec5SDimitry Andric case MVT::v2i8: 1370b57cec5SDimitry Andric case MVT::v4i8: 1380b57cec5SDimitry Andric case MVT::v2i16: 1390b57cec5SDimitry Andric case MVT::v4i16: 1405f757f3fSDimitry Andric case MVT::v8i16: // <4 x i16x2> 1410b57cec5SDimitry Andric case MVT::v2i32: 1420b57cec5SDimitry Andric case MVT::v4i32: 1430b57cec5SDimitry Andric case MVT::v2i64: 1440b57cec5SDimitry Andric case MVT::v2f16: 1450b57cec5SDimitry Andric case MVT::v4f16: 1460b57cec5SDimitry Andric case MVT::v8f16: // <4 x f16x2> 147bdd1243dSDimitry Andric case MVT::v2bf16: 148bdd1243dSDimitry Andric case MVT::v4bf16: 149bdd1243dSDimitry Andric case MVT::v8bf16: // <4 x bf16x2> 1500b57cec5SDimitry Andric case MVT::v2f32: 1510b57cec5SDimitry Andric case MVT::v4f32: 1520b57cec5SDimitry Andric case MVT::v2f64: 1530b57cec5SDimitry Andric return true; 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric } 1560b57cec5SDimitry Andric 1575f757f3fSDimitry Andric static bool Is16bitsType(MVT VT) { 1585f757f3fSDimitry Andric return (VT.SimpleTy == MVT::f16 || VT.SimpleTy == MVT::bf16 || 1595f757f3fSDimitry Andric VT.SimpleTy == MVT::i16); 16006c3fb27SDimitry Andric } 16106c3fb27SDimitry Andric 1620b57cec5SDimitry Andric /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive 1630b57cec5SDimitry Andric /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors 1640b57cec5SDimitry Andric /// into their primitive components. 1650b57cec5SDimitry Andric /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the 1660b57cec5SDimitry Andric /// same number of types as the Ins/Outs arrays in LowerFormalArguments, 1670b57cec5SDimitry Andric /// LowerCall, and LowerReturn. 1680b57cec5SDimitry Andric static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, 1690b57cec5SDimitry Andric Type *Ty, SmallVectorImpl<EVT> &ValueVTs, 1700b57cec5SDimitry Andric SmallVectorImpl<uint64_t> *Offsets = nullptr, 1710b57cec5SDimitry Andric uint64_t StartingOffset = 0) { 1720b57cec5SDimitry Andric SmallVector<EVT, 16> TempVTs; 1730b57cec5SDimitry Andric SmallVector<uint64_t, 16> TempOffsets; 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric // Special case for i128 - decompose to (i64, i64) 1760b57cec5SDimitry Andric if (Ty->isIntegerTy(128)) { 1770b57cec5SDimitry Andric ValueVTs.push_back(EVT(MVT::i64)); 1780b57cec5SDimitry Andric ValueVTs.push_back(EVT(MVT::i64)); 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric if (Offsets) { 1810b57cec5SDimitry Andric Offsets->push_back(StartingOffset + 0); 1820b57cec5SDimitry Andric Offsets->push_back(StartingOffset + 8); 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric return; 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs. 1890b57cec5SDimitry Andric if (StructType *STy = dyn_cast<StructType>(Ty)) { 1900b57cec5SDimitry Andric auto const *SL = DL.getStructLayout(STy); 1910b57cec5SDimitry Andric auto ElementNum = 0; 1920b57cec5SDimitry Andric for(auto *EI : STy->elements()) { 1930b57cec5SDimitry Andric ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets, 1940b57cec5SDimitry Andric StartingOffset + SL->getElementOffset(ElementNum)); 1950b57cec5SDimitry Andric ++ElementNum; 1960b57cec5SDimitry Andric } 1970b57cec5SDimitry Andric return; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); 2010b57cec5SDimitry Andric for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { 2020b57cec5SDimitry Andric EVT VT = TempVTs[i]; 2030b57cec5SDimitry Andric uint64_t Off = TempOffsets[i]; 2040b57cec5SDimitry Andric // Split vectors into individual elements, except for v2f16, which 2050b57cec5SDimitry Andric // we will pass as a single scalar. 2060b57cec5SDimitry Andric if (VT.isVector()) { 2070b57cec5SDimitry Andric unsigned NumElts = VT.getVectorNumElements(); 2080b57cec5SDimitry Andric EVT EltVT = VT.getVectorElementType(); 2090b57cec5SDimitry Andric // Vectors with an even number of f16 elements will be passed to 210bdd1243dSDimitry Andric // us as an array of v2f16/v2bf16 elements. We must match this so we 2110b57cec5SDimitry Andric // stay in sync with Ins/Outs. 2125f757f3fSDimitry Andric if ((Is16bitsType(EltVT.getSimpleVT())) && NumElts % 2 == 0) { 2135f757f3fSDimitry Andric switch (EltVT.getSimpleVT().SimpleTy) { 2145f757f3fSDimitry Andric case MVT::f16: 2155f757f3fSDimitry Andric EltVT = MVT::v2f16; 2165f757f3fSDimitry Andric break; 2175f757f3fSDimitry Andric case MVT::bf16: 2185f757f3fSDimitry Andric EltVT = MVT::v2bf16; 2195f757f3fSDimitry Andric break; 2205f757f3fSDimitry Andric case MVT::i16: 2215f757f3fSDimitry Andric EltVT = MVT::v2i16; 2225f757f3fSDimitry Andric break; 2235f757f3fSDimitry Andric default: 2245f757f3fSDimitry Andric llvm_unreachable("Unexpected type"); 2255f757f3fSDimitry Andric } 2260b57cec5SDimitry Andric NumElts /= 2; 2275f757f3fSDimitry Andric } else if (EltVT.getSimpleVT() == MVT::i8 && 2285f757f3fSDimitry Andric (NumElts % 4 == 0 || NumElts == 3)) { 2295f757f3fSDimitry Andric // v*i8 are formally lowered as v4i8 2305f757f3fSDimitry Andric EltVT = MVT::v4i8; 2315f757f3fSDimitry Andric NumElts = (NumElts + 3) / 4; 232*71ac745dSDimitry Andric } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) { 233*71ac745dSDimitry Andric // v2i8 is promoted to v2i16 234*71ac745dSDimitry Andric NumElts = 1; 235*71ac745dSDimitry Andric EltVT = MVT::v2i16; 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric for (unsigned j = 0; j != NumElts; ++j) { 2380b57cec5SDimitry Andric ValueVTs.push_back(EltVT); 2390b57cec5SDimitry Andric if (Offsets) 2400b57cec5SDimitry Andric Offsets->push_back(Off + j * EltVT.getStoreSize()); 2410b57cec5SDimitry Andric } 2420b57cec5SDimitry Andric } else { 2430b57cec5SDimitry Andric ValueVTs.push_back(VT); 2440b57cec5SDimitry Andric if (Offsets) 2450b57cec5SDimitry Andric Offsets->push_back(Off); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric } 2490b57cec5SDimitry Andric 250fcaf7f86SDimitry Andric /// PromoteScalarIntegerPTX 251fcaf7f86SDimitry Andric /// Used to make sure the arguments/returns are suitable for passing 252fcaf7f86SDimitry Andric /// and promote them to a larger size if they're not. 253fcaf7f86SDimitry Andric /// 254fcaf7f86SDimitry Andric /// The promoted type is placed in \p PromoteVT if the function returns true. 255fcaf7f86SDimitry Andric static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) { 256fcaf7f86SDimitry Andric if (VT.isScalarInteger()) { 257fcaf7f86SDimitry Andric switch (PowerOf2Ceil(VT.getFixedSizeInBits())) { 258fcaf7f86SDimitry Andric default: 259fcaf7f86SDimitry Andric llvm_unreachable( 260fcaf7f86SDimitry Andric "Promotion is not suitable for scalars of size larger than 64-bits"); 261fcaf7f86SDimitry Andric case 1: 262fcaf7f86SDimitry Andric *PromotedVT = MVT::i1; 263fcaf7f86SDimitry Andric break; 264fcaf7f86SDimitry Andric case 2: 265fcaf7f86SDimitry Andric case 4: 266fcaf7f86SDimitry Andric case 8: 267fcaf7f86SDimitry Andric *PromotedVT = MVT::i8; 268fcaf7f86SDimitry Andric break; 269fcaf7f86SDimitry Andric case 16: 270fcaf7f86SDimitry Andric *PromotedVT = MVT::i16; 271fcaf7f86SDimitry Andric break; 272fcaf7f86SDimitry Andric case 32: 273fcaf7f86SDimitry Andric *PromotedVT = MVT::i32; 274fcaf7f86SDimitry Andric break; 275fcaf7f86SDimitry Andric case 64: 276fcaf7f86SDimitry Andric *PromotedVT = MVT::i64; 277fcaf7f86SDimitry Andric break; 278fcaf7f86SDimitry Andric } 279fcaf7f86SDimitry Andric return EVT(*PromotedVT) != VT; 280fcaf7f86SDimitry Andric } 281fcaf7f86SDimitry Andric return false; 282fcaf7f86SDimitry Andric } 283fcaf7f86SDimitry Andric 2840b57cec5SDimitry Andric // Check whether we can merge loads/stores of some of the pieces of a 2850b57cec5SDimitry Andric // flattened function parameter or return value into a single vector 2860b57cec5SDimitry Andric // load/store. 2870b57cec5SDimitry Andric // 2880b57cec5SDimitry Andric // The flattened parameter is represented as a list of EVTs and 2890b57cec5SDimitry Andric // offsets, and the whole structure is aligned to ParamAlignment. This 2900b57cec5SDimitry Andric // function determines whether we can load/store pieces of the 2910b57cec5SDimitry Andric // parameter starting at index Idx using a single vectorized op of 2920b57cec5SDimitry Andric // size AccessSize. If so, it returns the number of param pieces 2930b57cec5SDimitry Andric // covered by the vector op. Otherwise, it returns 1. 2940b57cec5SDimitry Andric static unsigned CanMergeParamLoadStoresStartingAt( 2950b57cec5SDimitry Andric unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs, 2965ffd83dbSDimitry Andric const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) { 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric // Can't vectorize if param alignment is not sufficient. 2995ffd83dbSDimitry Andric if (ParamAlignment < AccessSize) 3000b57cec5SDimitry Andric return 1; 3010b57cec5SDimitry Andric // Can't vectorize if offset is not aligned. 3020b57cec5SDimitry Andric if (Offsets[Idx] & (AccessSize - 1)) 3030b57cec5SDimitry Andric return 1; 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric EVT EltVT = ValueVTs[Idx]; 3060b57cec5SDimitry Andric unsigned EltSize = EltVT.getStoreSize(); 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric // Element is too large to vectorize. 3090b57cec5SDimitry Andric if (EltSize >= AccessSize) 3100b57cec5SDimitry Andric return 1; 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric unsigned NumElts = AccessSize / EltSize; 3130b57cec5SDimitry Andric // Can't vectorize if AccessBytes if not a multiple of EltSize. 3140b57cec5SDimitry Andric if (AccessSize != EltSize * NumElts) 3150b57cec5SDimitry Andric return 1; 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric // We don't have enough elements to vectorize. 3180b57cec5SDimitry Andric if (Idx + NumElts > ValueVTs.size()) 3190b57cec5SDimitry Andric return 1; 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric // PTX ISA can only deal with 2- and 4-element vector ops. 3220b57cec5SDimitry Andric if (NumElts != 4 && NumElts != 2) 3230b57cec5SDimitry Andric return 1; 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) { 3260b57cec5SDimitry Andric // Types do not match. 3270b57cec5SDimitry Andric if (ValueVTs[j] != EltVT) 3280b57cec5SDimitry Andric return 1; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric // Elements are not contiguous. 3310b57cec5SDimitry Andric if (Offsets[j] - Offsets[j - 1] != EltSize) 3320b57cec5SDimitry Andric return 1; 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric // OK. We can vectorize ValueVTs[i..i+NumElts) 3350b57cec5SDimitry Andric return NumElts; 3360b57cec5SDimitry Andric } 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric // Flags for tracking per-element vectorization state of loads/stores 3390b57cec5SDimitry Andric // of a flattened function parameter or return value. 3400b57cec5SDimitry Andric enum ParamVectorizationFlags { 3410b57cec5SDimitry Andric PVF_INNER = 0x0, // Middle elements of a vector. 3420b57cec5SDimitry Andric PVF_FIRST = 0x1, // First element of the vector. 3430b57cec5SDimitry Andric PVF_LAST = 0x2, // Last element of the vector. 3440b57cec5SDimitry Andric // Scalar is effectively a 1-element vector. 3450b57cec5SDimitry Andric PVF_SCALAR = PVF_FIRST | PVF_LAST 3460b57cec5SDimitry Andric }; 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric // Computes whether and how we can vectorize the loads/stores of a 3490b57cec5SDimitry Andric // flattened function parameter or return value. 3500b57cec5SDimitry Andric // 3510b57cec5SDimitry Andric // The flattened parameter is represented as the list of ValueVTs and 3520b57cec5SDimitry Andric // Offsets, and is aligned to ParamAlignment bytes. We return a vector 3530b57cec5SDimitry Andric // of the same size as ValueVTs indicating how each piece should be 3540b57cec5SDimitry Andric // loaded/stored (i.e. as a scalar, or as part of a vector 3550b57cec5SDimitry Andric // load/store). 3560b57cec5SDimitry Andric static SmallVector<ParamVectorizationFlags, 16> 3570b57cec5SDimitry Andric VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs, 3580b57cec5SDimitry Andric const SmallVectorImpl<uint64_t> &Offsets, 359bdd1243dSDimitry Andric Align ParamAlignment, bool IsVAArg = false) { 3600b57cec5SDimitry Andric // Set vector size to match ValueVTs and mark all elements as 3610b57cec5SDimitry Andric // scalars by default. 3620b57cec5SDimitry Andric SmallVector<ParamVectorizationFlags, 16> VectorInfo; 3630b57cec5SDimitry Andric VectorInfo.assign(ValueVTs.size(), PVF_SCALAR); 3640b57cec5SDimitry Andric 365bdd1243dSDimitry Andric if (IsVAArg) 366bdd1243dSDimitry Andric return VectorInfo; 367bdd1243dSDimitry Andric 3680b57cec5SDimitry Andric // Check what we can vectorize using 128/64/32-bit accesses. 3690b57cec5SDimitry Andric for (int I = 0, E = ValueVTs.size(); I != E; ++I) { 3700b57cec5SDimitry Andric // Skip elements we've already processed. 3710b57cec5SDimitry Andric assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state."); 3720b57cec5SDimitry Andric for (unsigned AccessSize : {16, 8, 4, 2}) { 3730b57cec5SDimitry Andric unsigned NumElts = CanMergeParamLoadStoresStartingAt( 3740b57cec5SDimitry Andric I, AccessSize, ValueVTs, Offsets, ParamAlignment); 3750b57cec5SDimitry Andric // Mark vectorized elements. 3760b57cec5SDimitry Andric switch (NumElts) { 3770b57cec5SDimitry Andric default: 3780b57cec5SDimitry Andric llvm_unreachable("Unexpected return value"); 3790b57cec5SDimitry Andric case 1: 3800b57cec5SDimitry Andric // Can't vectorize using this size, try next smaller size. 3810b57cec5SDimitry Andric continue; 3820b57cec5SDimitry Andric case 2: 3830b57cec5SDimitry Andric assert(I + 1 < E && "Not enough elements."); 3840b57cec5SDimitry Andric VectorInfo[I] = PVF_FIRST; 3850b57cec5SDimitry Andric VectorInfo[I + 1] = PVF_LAST; 3860b57cec5SDimitry Andric I += 1; 3870b57cec5SDimitry Andric break; 3880b57cec5SDimitry Andric case 4: 3890b57cec5SDimitry Andric assert(I + 3 < E && "Not enough elements."); 3900b57cec5SDimitry Andric VectorInfo[I] = PVF_FIRST; 3910b57cec5SDimitry Andric VectorInfo[I + 1] = PVF_INNER; 3920b57cec5SDimitry Andric VectorInfo[I + 2] = PVF_INNER; 3930b57cec5SDimitry Andric VectorInfo[I + 3] = PVF_LAST; 3940b57cec5SDimitry Andric I += 3; 3950b57cec5SDimitry Andric break; 3960b57cec5SDimitry Andric } 3970b57cec5SDimitry Andric // Break out of the inner loop because we've already succeeded 3980b57cec5SDimitry Andric // using largest possible AccessSize. 3990b57cec5SDimitry Andric break; 4000b57cec5SDimitry Andric } 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric return VectorInfo; 4030b57cec5SDimitry Andric } 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric // NVPTXTargetLowering Constructor. 4060b57cec5SDimitry Andric NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, 4070b57cec5SDimitry Andric const NVPTXSubtarget &STI) 4080b57cec5SDimitry Andric : TargetLowering(TM), nvTM(&TM), STI(STI) { 4090b57cec5SDimitry Andric // always lower memset, memcpy, and memmove intrinsics to load/store 4100b57cec5SDimitry Andric // instructions, rather 4110b57cec5SDimitry Andric // then generating calls to memset, mempcy or memmove. 4125f757f3fSDimitry Andric MaxStoresPerMemset = MaxStoresPerMemsetOptSize = (unsigned)0xFFFFFFFF; 4135f757f3fSDimitry Andric MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = (unsigned) 0xFFFFFFFF; 4145f757f3fSDimitry Andric MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = (unsigned) 0xFFFFFFFF; 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric setBooleanContents(ZeroOrNegativeOneBooleanContent); 4170b57cec5SDimitry Andric setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric // Jump is Expensive. Don't create extra control flow for 'and', 'or' 4200b57cec5SDimitry Andric // condition branches. 4210b57cec5SDimitry Andric setJumpIsExpensive(true); 4220b57cec5SDimitry Andric 4230b57cec5SDimitry Andric // Wide divides are _very_ slow. Try to reduce the width of the divide if 4240b57cec5SDimitry Andric // possible. 4250b57cec5SDimitry Andric addBypassSlowDiv(64, 32); 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric // By default, use the Source scheduling 4280b57cec5SDimitry Andric if (sched4reg) 4290b57cec5SDimitry Andric setSchedulingPreference(Sched::RegPressure); 4300b57cec5SDimitry Andric else 4310b57cec5SDimitry Andric setSchedulingPreference(Sched::Source); 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action, 4340b57cec5SDimitry Andric LegalizeAction NoF16Action) { 4350b57cec5SDimitry Andric setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action); 4360b57cec5SDimitry Andric }; 4370b57cec5SDimitry Andric 43806c3fb27SDimitry Andric auto setBF16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action, 43906c3fb27SDimitry Andric LegalizeAction NoBF16Action) { 44006c3fb27SDimitry Andric bool IsOpSupported = STI.hasBF16Math(); 44106c3fb27SDimitry Andric // Few instructions are available on sm_90 only 44206c3fb27SDimitry Andric switch(Op) { 44306c3fb27SDimitry Andric case ISD::FADD: 44406c3fb27SDimitry Andric case ISD::FMUL: 44506c3fb27SDimitry Andric case ISD::FSUB: 4465f757f3fSDimitry Andric case ISD::SELECT: 4475f757f3fSDimitry Andric case ISD::SELECT_CC: 4485f757f3fSDimitry Andric case ISD::SETCC: 4495f757f3fSDimitry Andric case ISD::FEXP2: 4505f757f3fSDimitry Andric case ISD::FCEIL: 4515f757f3fSDimitry Andric case ISD::FFLOOR: 4525f757f3fSDimitry Andric case ISD::FNEARBYINT: 4535f757f3fSDimitry Andric case ISD::FRINT: 4540fca6ea1SDimitry Andric case ISD::FROUNDEVEN: 4555f757f3fSDimitry Andric case ISD::FTRUNC: 45606c3fb27SDimitry Andric IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 78; 45706c3fb27SDimitry Andric break; 45806c3fb27SDimitry Andric } 45906c3fb27SDimitry Andric setOperationAction( 46006c3fb27SDimitry Andric Op, VT, IsOpSupported ? Action : NoBF16Action); 46106c3fb27SDimitry Andric }; 46206c3fb27SDimitry Andric 4635f757f3fSDimitry Andric auto setI16x2OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action, 4645f757f3fSDimitry Andric LegalizeAction NoI16x2Action) { 4655f757f3fSDimitry Andric bool IsOpSupported = false; 4665f757f3fSDimitry Andric // instructions are available on sm_90 only 4675f757f3fSDimitry Andric switch (Op) { 4685f757f3fSDimitry Andric case ISD::ADD: 4695f757f3fSDimitry Andric case ISD::SMAX: 4705f757f3fSDimitry Andric case ISD::SMIN: 4715f757f3fSDimitry Andric case ISD::UMIN: 4725f757f3fSDimitry Andric case ISD::UMAX: 4735f757f3fSDimitry Andric IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 80; 4745f757f3fSDimitry Andric break; 4755f757f3fSDimitry Andric } 4765f757f3fSDimitry Andric setOperationAction(Op, VT, IsOpSupported ? Action : NoI16x2Action); 4775f757f3fSDimitry Andric }; 4785f757f3fSDimitry Andric 4790b57cec5SDimitry Andric addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 4800b57cec5SDimitry Andric addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 4815f757f3fSDimitry Andric addRegisterClass(MVT::v2i16, &NVPTX::Int32RegsRegClass); 4825f757f3fSDimitry Andric addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass); 4830b57cec5SDimitry Andric addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 4840b57cec5SDimitry Andric addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 4850b57cec5SDimitry Andric addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 4860b57cec5SDimitry Andric addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 48706c3fb27SDimitry Andric addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass); 48806c3fb27SDimitry Andric addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass); 48906c3fb27SDimitry Andric addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass); 49006c3fb27SDimitry Andric addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass); 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric // Conversion to/from FP16/FP16x2 is always legal. 4930b57cec5SDimitry Andric setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom); 4940b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom); 4950b57cec5SDimitry Andric setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand); 4960b57cec5SDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand); 4970b57cec5SDimitry Andric 4980fca6ea1SDimitry Andric setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); 4990fca6ea1SDimitry Andric if (STI.getSmVersion() >= 30 && STI.getPTXVersion() > 31) 5000fca6ea1SDimitry Andric setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal); 5010fca6ea1SDimitry Andric 5020b57cec5SDimitry Andric setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote); 5030b57cec5SDimitry Andric setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand); 5040b57cec5SDimitry Andric 50506c3fb27SDimitry Andric // Conversion to/from BFP16/BFP16x2 is always legal. 50606c3fb27SDimitry Andric setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Custom); 50706c3fb27SDimitry Andric setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2bf16, Custom); 50806c3fb27SDimitry Andric setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2bf16, Expand); 50906c3fb27SDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2bf16, Expand); 51006c3fb27SDimitry Andric 51106c3fb27SDimitry Andric setBF16OperationAction(ISD::SETCC, MVT::v2bf16, Legal, Expand); 5125f757f3fSDimitry Andric setBF16OperationAction(ISD::SETCC, MVT::bf16, Legal, Promote); 5135f757f3fSDimitry Andric if (getOperationAction(ISD::SETCC, MVT::bf16) == Promote) 5145f757f3fSDimitry Andric AddPromotedToType(ISD::SETCC, MVT::bf16, MVT::f32); 5155f757f3fSDimitry Andric 5165f757f3fSDimitry Andric // Conversion to/from i16/i16x2 is always legal. 5175f757f3fSDimitry Andric setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom); 5185f757f3fSDimitry Andric setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom); 5195f757f3fSDimitry Andric setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand); 5205f757f3fSDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand); 5215f757f3fSDimitry Andric 5225f757f3fSDimitry Andric setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom); 5235f757f3fSDimitry Andric setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom); 5245f757f3fSDimitry Andric setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom); 5255f757f3fSDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom); 5265f757f3fSDimitry Andric // Only logical ops can be done on v4i8 directly, others must be done 5275f757f3fSDimitry Andric // elementwise. 5285f757f3fSDimitry Andric setOperationAction( 5295f757f3fSDimitry Andric {ISD::ABS, ISD::ADD, ISD::ADDC, ISD::ADDE, 5305f757f3fSDimitry Andric ISD::BITREVERSE, ISD::CTLZ, ISD::CTPOP, ISD::CTTZ, 5315f757f3fSDimitry Andric ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FSHL, ISD::FSHR, 5325f757f3fSDimitry Andric ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::PARITY, 5335f757f3fSDimitry Andric ISD::ROTL, ISD::ROTR, ISD::SADDO, ISD::SADDO_CARRY, 5345f757f3fSDimitry Andric ISD::SADDSAT, ISD::SDIV, ISD::SDIVREM, ISD::SELECT_CC, 5355f757f3fSDimitry Andric ISD::SETCC, ISD::SHL, ISD::SINT_TO_FP, ISD::SMAX, 5365f757f3fSDimitry Andric ISD::SMIN, ISD::SMULO, ISD::SMUL_LOHI, ISD::SRA, 5375f757f3fSDimitry Andric ISD::SREM, ISD::SRL, ISD::SSHLSAT, ISD::SSUBO, 5385f757f3fSDimitry Andric ISD::SSUBO_CARRY, ISD::SSUBSAT, ISD::SUB, ISD::SUBC, 5395f757f3fSDimitry Andric ISD::SUBE, ISD::UADDO, ISD::UADDO_CARRY, ISD::UADDSAT, 5405f757f3fSDimitry Andric ISD::UDIV, ISD::UDIVREM, ISD::UINT_TO_FP, ISD::UMAX, 5415f757f3fSDimitry Andric ISD::UMIN, ISD::UMULO, ISD::UMUL_LOHI, ISD::UREM, 5425f757f3fSDimitry Andric ISD::USHLSAT, ISD::USUBO, ISD::USUBO_CARRY, ISD::VSELECT, 5435f757f3fSDimitry Andric ISD::USUBSAT}, 5445f757f3fSDimitry Andric MVT::v4i8, Expand); 5455f757f3fSDimitry Andric 5460b57cec5SDimitry Andric // Operations not directly supported by NVPTX. 54706c3fb27SDimitry Andric for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, 5485f757f3fSDimitry Andric MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::v4i8, 5495f757f3fSDimitry Andric MVT::i32, MVT::i64}) { 5500b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, VT, Expand); 5510b57cec5SDimitry Andric setOperationAction(ISD::BR_CC, VT, Expand); 5520b57cec5SDimitry Andric } 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric // Some SIGN_EXTEND_INREG can be done using cvt instruction. 5550b57cec5SDimitry Andric // For others we will expand to a SHL/SRA pair. 5560b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); 5570b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 5580b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 5590b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 5600b57cec5SDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 5615f757f3fSDimitry Andric setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand); 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); 5640b57cec5SDimitry Andric setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); 5650b57cec5SDimitry Andric setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); 5660b57cec5SDimitry Andric setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); 5670b57cec5SDimitry Andric setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); 5680b57cec5SDimitry Andric setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); 5710b57cec5SDimitry Andric setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric // TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs 5740b57cec5SDimitry Andric // that don't have h/w rotation we lower them to multi-instruction assembly. 5750b57cec5SDimitry Andric // See ROT*_sw in NVPTXIntrInfo.td 5760b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i64, Legal); 5770b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i64, Legal); 5780b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i32, Legal); 5790b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i32, Legal); 5800b57cec5SDimitry Andric 5810b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i16, Expand); 5825f757f3fSDimitry Andric setOperationAction(ISD::ROTL, MVT::v2i16, Expand); 5830b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i16, Expand); 5845f757f3fSDimitry Andric setOperationAction(ISD::ROTR, MVT::v2i16, Expand); 5850b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i8, Expand); 5860b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i8, Expand); 5870b57cec5SDimitry Andric setOperationAction(ISD::BSWAP, MVT::i16, Expand); 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric // Indirect branch is not supported. 5900b57cec5SDimitry Andric // This also disables Jump Table creation. 5910b57cec5SDimitry Andric setOperationAction(ISD::BR_JT, MVT::Other, Expand); 5920b57cec5SDimitry Andric setOperationAction(ISD::BRIND, MVT::Other, Expand); 5930b57cec5SDimitry Andric 5940b57cec5SDimitry Andric setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 5950b57cec5SDimitry Andric setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric // We want to legalize constant related memmove and memcopy 5980b57cec5SDimitry Andric // intrinsics. 5990b57cec5SDimitry Andric setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric // Turn FP extload into load/fpextend 6020b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 6030b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 60406c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); 60506c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); 6060b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 6070b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); 6080b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); 60906c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand); 61006c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand); 6110b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); 6120b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); 6130b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); 61406c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand); 61506c3fb27SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand); 6160b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); 6175f757f3fSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); 6185f757f3fSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); 6195f757f3fSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand); 6205f757f3fSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand); 6210b57cec5SDimitry Andric // Turn FP truncstore into trunc + store. 6220b57cec5SDimitry Andric // FIXME: vector types should also be expanded 6230b57cec5SDimitry Andric setTruncStoreAction(MVT::f32, MVT::f16, Expand); 6240b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f16, Expand); 62506c3fb27SDimitry Andric setTruncStoreAction(MVT::f32, MVT::bf16, Expand); 62606c3fb27SDimitry Andric setTruncStoreAction(MVT::f64, MVT::bf16, Expand); 6270b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f32, Expand); 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric // PTX does not support load / store predicate registers 6300b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::i1, Custom); 6310b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::i1, Custom); 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) { 6340b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 6350b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 6360fca6ea1SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 6370b57cec5SDimitry Andric setTruncStoreAction(VT, MVT::i1, Expand); 6380b57cec5SDimitry Andric } 6390b57cec5SDimitry Andric 6405f757f3fSDimitry Andric // expand extload of vector of integers. 6415f757f3fSDimitry Andric setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16, 6425f757f3fSDimitry Andric MVT::v2i8, Expand); 6435f757f3fSDimitry Andric setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand); 6445f757f3fSDimitry Andric 6450b57cec5SDimitry Andric // This is legal in NVPTX 6460b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 6470b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 6480b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f16, Legal); 649bdd1243dSDimitry Andric setOperationAction(ISD::ConstantFP, MVT::bf16, Legal); 6500b57cec5SDimitry Andric 6515f757f3fSDimitry Andric setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 6525f757f3fSDimitry Andric setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); 6535f757f3fSDimitry Andric 6540b57cec5SDimitry Andric // TRAP can be lowered to PTX trap 6550b57cec5SDimitry Andric setOperationAction(ISD::TRAP, MVT::Other, Legal); 6560b57cec5SDimitry Andric 6570b57cec5SDimitry Andric // Register custom handling for vector loads/stores 6588bcb0991SDimitry Andric for (MVT VT : MVT::fixedlen_vector_valuetypes()) { 6590b57cec5SDimitry Andric if (IsPTXVectorType(VT)) { 6600b57cec5SDimitry Andric setOperationAction(ISD::LOAD, VT, Custom); 6610b57cec5SDimitry Andric setOperationAction(ISD::STORE, VT, Custom); 6620b57cec5SDimitry Andric setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); 6630b57cec5SDimitry Andric } 6640b57cec5SDimitry Andric } 6650b57cec5SDimitry Andric 666bdd1243dSDimitry Andric // Support varargs. 667bdd1243dSDimitry Andric setOperationAction(ISD::VASTART, MVT::Other, Custom); 668bdd1243dSDimitry Andric setOperationAction(ISD::VAARG, MVT::Other, Custom); 669bdd1243dSDimitry Andric setOperationAction(ISD::VACOPY, MVT::Other, Expand); 670bdd1243dSDimitry Andric setOperationAction(ISD::VAEND, MVT::Other, Expand); 671bdd1243dSDimitry Andric 6720b57cec5SDimitry Andric // Custom handling for i8 intrinsics 6730b57cec5SDimitry Andric setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) { 6760b57cec5SDimitry Andric setOperationAction(ISD::ABS, Ty, Legal); 6770b57cec5SDimitry Andric setOperationAction(ISD::SMIN, Ty, Legal); 6780b57cec5SDimitry Andric setOperationAction(ISD::SMAX, Ty, Legal); 6790b57cec5SDimitry Andric setOperationAction(ISD::UMIN, Ty, Legal); 6800b57cec5SDimitry Andric setOperationAction(ISD::UMAX, Ty, Legal); 6810b57cec5SDimitry Andric 6820b57cec5SDimitry Andric setOperationAction(ISD::CTPOP, Ty, Legal); 6830b57cec5SDimitry Andric setOperationAction(ISD::CTLZ, Ty, Legal); 6840b57cec5SDimitry Andric } 6850b57cec5SDimitry Andric 6865f757f3fSDimitry Andric setI16x2OperationAction(ISD::ABS, MVT::v2i16, Legal, Custom); 6875f757f3fSDimitry Andric setI16x2OperationAction(ISD::SMIN, MVT::v2i16, Legal, Custom); 6885f757f3fSDimitry Andric setI16x2OperationAction(ISD::SMAX, MVT::v2i16, Legal, Custom); 6895f757f3fSDimitry Andric setI16x2OperationAction(ISD::UMIN, MVT::v2i16, Legal, Custom); 6905f757f3fSDimitry Andric setI16x2OperationAction(ISD::UMAX, MVT::v2i16, Legal, Custom); 6915f757f3fSDimitry Andric setI16x2OperationAction(ISD::CTPOP, MVT::v2i16, Legal, Expand); 6925f757f3fSDimitry Andric setI16x2OperationAction(ISD::CTLZ, MVT::v2i16, Legal, Expand); 6935f757f3fSDimitry Andric 6945f757f3fSDimitry Andric setI16x2OperationAction(ISD::ADD, MVT::v2i16, Legal, Custom); 6955f757f3fSDimitry Andric setI16x2OperationAction(ISD::SUB, MVT::v2i16, Legal, Custom); 6965f757f3fSDimitry Andric setI16x2OperationAction(ISD::MUL, MVT::v2i16, Legal, Custom); 6975f757f3fSDimitry Andric setI16x2OperationAction(ISD::SHL, MVT::v2i16, Legal, Custom); 6985f757f3fSDimitry Andric setI16x2OperationAction(ISD::SREM, MVT::v2i16, Legal, Custom); 6995f757f3fSDimitry Andric setI16x2OperationAction(ISD::UREM, MVT::v2i16, Legal, Custom); 7005f757f3fSDimitry Andric 7015f757f3fSDimitry Andric // Other arithmetic and logic ops are unsupported. 7025f757f3fSDimitry Andric setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS, 7035f757f3fSDimitry Andric ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT, 7045f757f3fSDimitry Andric ISD::SINT_TO_FP, ISD::UINT_TO_FP}, 7055f757f3fSDimitry Andric MVT::v2i16, Expand); 7065f757f3fSDimitry Andric 70781ad6265SDimitry Andric setOperationAction(ISD::ADDC, MVT::i32, Legal); 70881ad6265SDimitry Andric setOperationAction(ISD::ADDE, MVT::i32, Legal); 70981ad6265SDimitry Andric setOperationAction(ISD::SUBC, MVT::i32, Legal); 71081ad6265SDimitry Andric setOperationAction(ISD::SUBE, MVT::i32, Legal); 71181ad6265SDimitry Andric if (STI.getPTXVersion() >= 43) { 71281ad6265SDimitry Andric setOperationAction(ISD::ADDC, MVT::i64, Legal); 71381ad6265SDimitry Andric setOperationAction(ISD::ADDE, MVT::i64, Legal); 71481ad6265SDimitry Andric setOperationAction(ISD::SUBC, MVT::i64, Legal); 71581ad6265SDimitry Andric setOperationAction(ISD::SUBE, MVT::i64, Legal); 71681ad6265SDimitry Andric } 71781ad6265SDimitry Andric 7180b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, MVT::i16, Expand); 7195f757f3fSDimitry Andric setOperationAction(ISD::CTTZ, MVT::v2i16, Expand); 7200b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, MVT::i32, Expand); 7210b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, MVT::i64, Expand); 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric // PTX does not directly support SELP of i1, so promote to i32 first 7240b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::i1, Custom); 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric // PTX cannot multiply two i64s in a single instruction. 7270b57cec5SDimitry Andric setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 7280b57cec5SDimitry Andric setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 7290b57cec5SDimitry Andric 7300b57cec5SDimitry Andric // We have some custom DAG combine patterns for these nodes 7315f757f3fSDimitry Andric setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::EXTRACT_VECTOR_ELT, ISD::FADD, 7325f757f3fSDimitry Andric ISD::LOAD, ISD::MUL, ISD::SHL, ISD::SREM, ISD::UREM, 7335f757f3fSDimitry Andric ISD::VSELECT}); 7340b57cec5SDimitry Andric 73506c3fb27SDimitry Andric // setcc for f16x2 and bf16x2 needs special handling to prevent 73606c3fb27SDimitry Andric // legalizer's attempt to scalarize it due to v2i1 not being legal. 73706c3fb27SDimitry Andric if (STI.allowFP16Math() || STI.hasBF16Math()) 7380b57cec5SDimitry Andric setTargetDAGCombine(ISD::SETCC); 7390b57cec5SDimitry Andric 7400b57cec5SDimitry Andric // Promote fp16 arithmetic if fp16 hardware isn't available or the 7410b57cec5SDimitry Andric // user passed --nvptx-no-fp16-math. The flag is useful because, 7420b57cec5SDimitry Andric // although sm_53+ GPUs have some sort of FP16 support in 7430b57cec5SDimitry Andric // hardware, only sm_53 and sm_60 have full implementation. Others 7440b57cec5SDimitry Andric // only have token amount of hardware and are likely to run faster 7450b57cec5SDimitry Andric // by using fp32 units instead. 7460b57cec5SDimitry Andric for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) { 7470b57cec5SDimitry Andric setFP16OperationAction(Op, MVT::f16, Legal, Promote); 7480b57cec5SDimitry Andric setFP16OperationAction(Op, MVT::v2f16, Legal, Expand); 74906c3fb27SDimitry Andric setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand); 75006c3fb27SDimitry Andric // bf16 must be promoted to f32. 7515f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::bf16, Legal, Promote); 75206c3fb27SDimitry Andric if (getOperationAction(Op, MVT::bf16) == Promote) 75306c3fb27SDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32); 7540b57cec5SDimitry Andric } 7550b57cec5SDimitry Andric 756bdd1243dSDimitry Andric // f16/f16x2 neg was introduced in PTX 60, SM_53. 757bdd1243dSDimitry Andric const bool IsFP16FP16x2NegAvailable = STI.getSmVersion() >= 53 && 758bdd1243dSDimitry Andric STI.getPTXVersion() >= 60 && 759bdd1243dSDimitry Andric STI.allowFP16Math(); 760bdd1243dSDimitry Andric for (const auto &VT : {MVT::f16, MVT::v2f16}) 761bdd1243dSDimitry Andric setOperationAction(ISD::FNEG, VT, 762bdd1243dSDimitry Andric IsFP16FP16x2NegAvailable ? Legal : Expand); 7630b57cec5SDimitry Andric 76406c3fb27SDimitry Andric setBF16OperationAction(ISD::FNEG, MVT::bf16, Legal, Expand); 76506c3fb27SDimitry Andric setBF16OperationAction(ISD::FNEG, MVT::v2bf16, Legal, Expand); 7660b57cec5SDimitry Andric // (would be) Library functions. 7670b57cec5SDimitry Andric 7680b57cec5SDimitry Andric // These map to conversion instructions for scalar FP types. 7690b57cec5SDimitry Andric for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT, 770bdd1243dSDimitry Andric ISD::FROUNDEVEN, ISD::FTRUNC}) { 7710b57cec5SDimitry Andric setOperationAction(Op, MVT::f16, Legal); 7720b57cec5SDimitry Andric setOperationAction(Op, MVT::f32, Legal); 7730b57cec5SDimitry Andric setOperationAction(Op, MVT::f64, Legal); 7740b57cec5SDimitry Andric setOperationAction(Op, MVT::v2f16, Expand); 77506c3fb27SDimitry Andric setOperationAction(Op, MVT::v2bf16, Expand); 7765f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::bf16, Legal, Promote); 7775f757f3fSDimitry Andric if (getOperationAction(Op, MVT::bf16) == Promote) 7785f757f3fSDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32); 7795f757f3fSDimitry Andric } 7805f757f3fSDimitry Andric 7810fca6ea1SDimitry Andric if (STI.getSmVersion() < 80 || STI.getPTXVersion() < 71) { 7820fca6ea1SDimitry Andric setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand); 7830fca6ea1SDimitry Andric } 7840fca6ea1SDimitry Andric if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) { 7850fca6ea1SDimitry Andric for (MVT VT : {MVT::bf16, MVT::f32, MVT::f64}) { 7860fca6ea1SDimitry Andric setOperationAction(ISD::FP_EXTEND, VT, Custom); 7870fca6ea1SDimitry Andric setOperationAction(ISD::FP_ROUND, VT, Custom); 7880fca6ea1SDimitry Andric } 7890fca6ea1SDimitry Andric } 7900fca6ea1SDimitry Andric 7915f757f3fSDimitry Andric // sm_80 only has conversions between f32 and bf16. Custom lower all other 7925f757f3fSDimitry Andric // bf16 conversions. 7930fca6ea1SDimitry Andric if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) { 7945f757f3fSDimitry Andric for (MVT VT : {MVT::i1, MVT::i16, MVT::i32, MVT::i64}) { 7955f757f3fSDimitry Andric setOperationAction( 7965f757f3fSDimitry Andric {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, 7975f757f3fSDimitry Andric VT, Custom); 7985f757f3fSDimitry Andric } 7990fca6ea1SDimitry Andric setOperationAction( 8000fca6ea1SDimitry Andric {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, 8010fca6ea1SDimitry Andric MVT::bf16, Custom); 8020b57cec5SDimitry Andric } 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f16, Promote); 8050b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::v2f16, Expand); 80606c3fb27SDimitry Andric setOperationAction(ISD::FROUND, MVT::v2bf16, Expand); 8070b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f32, Custom); 8080b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f64, Custom); 8095f757f3fSDimitry Andric setOperationAction(ISD::FROUND, MVT::bf16, Promote); 8105f757f3fSDimitry Andric AddPromotedToType(ISD::FROUND, MVT::bf16, MVT::f32); 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andric // 'Expand' implements FCOPYSIGN without calling an external library. 8130b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); 8140b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand); 81506c3fb27SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); 81606c3fb27SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::v2bf16, Expand); 8170b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 8180b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andric // These map to corresponding instructions for f32/f64. f16 must be 8210b57cec5SDimitry Andric // promoted to f32. v2f16 is expanded to f16, which is then promoted 8220b57cec5SDimitry Andric // to f32. 82304eeddc0SDimitry Andric for (const auto &Op : 8245f757f3fSDimitry Andric {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) { 8250b57cec5SDimitry Andric setOperationAction(Op, MVT::f16, Promote); 8260b57cec5SDimitry Andric setOperationAction(Op, MVT::f32, Legal); 8270b57cec5SDimitry Andric setOperationAction(Op, MVT::f64, Legal); 8280b57cec5SDimitry Andric setOperationAction(Op, MVT::v2f16, Expand); 82906c3fb27SDimitry Andric setOperationAction(Op, MVT::v2bf16, Expand); 8305f757f3fSDimitry Andric setOperationAction(Op, MVT::bf16, Promote); 8315f757f3fSDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32); 8320b57cec5SDimitry Andric } 8335f757f3fSDimitry Andric for (const auto &Op : {ISD::FABS}) { 8345f757f3fSDimitry Andric setOperationAction(Op, MVT::f16, Promote); 8355f757f3fSDimitry Andric setOperationAction(Op, MVT::f32, Legal); 8365f757f3fSDimitry Andric setOperationAction(Op, MVT::f64, Legal); 8375f757f3fSDimitry Andric setOperationAction(Op, MVT::v2f16, Expand); 8385f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand); 8395f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::bf16, Legal, Promote); 8405f757f3fSDimitry Andric if (getOperationAction(Op, MVT::bf16) == Promote) 8415f757f3fSDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32); 8425f757f3fSDimitry Andric } 8435f757f3fSDimitry Andric 84404eeddc0SDimitry Andric // max.f16, max.f16x2 and max.NaN are supported on sm_80+. 84504eeddc0SDimitry Andric auto GetMinMaxAction = [&](LegalizeAction NotSm80Action) { 84604eeddc0SDimitry Andric bool IsAtLeastSm80 = STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70; 84704eeddc0SDimitry Andric return IsAtLeastSm80 ? Legal : NotSm80Action; 84804eeddc0SDimitry Andric }; 84904eeddc0SDimitry Andric for (const auto &Op : {ISD::FMINNUM, ISD::FMAXNUM}) { 85004eeddc0SDimitry Andric setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Promote), Promote); 85104eeddc0SDimitry Andric setOperationAction(Op, MVT::f32, Legal); 85204eeddc0SDimitry Andric setOperationAction(Op, MVT::f64, Legal); 85304eeddc0SDimitry Andric setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand); 85406c3fb27SDimitry Andric setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand); 8555f757f3fSDimitry Andric setBF16OperationAction(Op, MVT::bf16, Legal, Promote); 8565f757f3fSDimitry Andric if (getOperationAction(Op, MVT::bf16) == Promote) 8575f757f3fSDimitry Andric AddPromotedToType(Op, MVT::bf16, MVT::f32); 85804eeddc0SDimitry Andric } 85904eeddc0SDimitry Andric for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) { 86004eeddc0SDimitry Andric setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand); 86106c3fb27SDimitry Andric setFP16OperationAction(Op, MVT::bf16, Legal, Expand); 86204eeddc0SDimitry Andric setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand)); 86304eeddc0SDimitry Andric setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand); 86406c3fb27SDimitry Andric setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand); 86504eeddc0SDimitry Andric } 8660b57cec5SDimitry Andric 8670fca6ea1SDimitry Andric // Custom lowering for inline asm with 128-bit operands 8680fca6ea1SDimitry Andric setOperationAction(ISD::CopyToReg, MVT::i128, Custom); 8690fca6ea1SDimitry Andric setOperationAction(ISD::CopyFromReg, MVT::i128, Custom); 8700fca6ea1SDimitry Andric 8710b57cec5SDimitry Andric // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate. 8720b57cec5SDimitry Andric // No FPOW or FREM in PTX. 8730b57cec5SDimitry Andric 8740b57cec5SDimitry Andric // Now deduce the information based on the above mentioned 8750b57cec5SDimitry Andric // actions 8760b57cec5SDimitry Andric computeRegisterProperties(STI.getRegisterInfo()); 87781ad6265SDimitry Andric 87881ad6265SDimitry Andric setMinCmpXchgSizeInBits(32); 8791db9f3b2SDimitry Andric setMaxAtomicSizeInBitsSupported(64); 8800fca6ea1SDimitry Andric setMaxDivRemBitWidthSupported(64); 8810b57cec5SDimitry Andric } 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 8840fca6ea1SDimitry Andric 8850fca6ea1SDimitry Andric #define MAKE_CASE(V) \ 8860fca6ea1SDimitry Andric case V: \ 8870fca6ea1SDimitry Andric return #V; 8880fca6ea1SDimitry Andric 8890b57cec5SDimitry Andric switch ((NVPTXISD::NodeType)Opcode) { 8900b57cec5SDimitry Andric case NVPTXISD::FIRST_NUMBER: 8910b57cec5SDimitry Andric break; 8920b57cec5SDimitry Andric 8930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CALL) 8940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::RET_GLUE) 8950fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LOAD_PARAM) 8960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Wrapper) 8970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareParam) 8980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareScalarParam) 8990fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareRet) 9000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareScalarRet) 9010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DeclareRetParam) 9020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PrintCall) 9030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PrintConvergentCall) 9040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PrintCallUni) 9050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PrintConvergentCallUni) 9060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadParam) 9070fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadParamV2) 9080fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadParamV4) 9090fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParam) 9100fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParamV2) 9110fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParamV4) 9120fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParamS32) 9130fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreParamU32) 9140fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallArgBegin) 9150fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallArg) 9160fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LastCallArg) 9170fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallArgEnd) 9180fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallVoid) 9190fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallVal) 9200fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallSymbol) 9210fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Prototype) 9220fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::MoveParam) 9230fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreRetval) 9240fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreRetvalV2) 9250fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreRetvalV4) 9260fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PseudoUseParam) 9270fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::RETURN) 9280fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallSeqBegin) 9290fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallSeqEnd) 9300fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::CallPrototype) 9310fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::ProxyReg) 9320fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadV2) 9330fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LoadV4) 9340fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LDGV2) 9350fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LDGV4) 9360fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LDUV2) 9370fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::LDUV4) 9380fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreV2) 9390fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::StoreV4) 9400fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::FUN_SHFL_CLAMP) 9410fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::FUN_SHFR_CLAMP) 9420fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::IMAD) 9430fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::BFE) 9440fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::BFI) 9450fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::PRMT) 9460fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC) 9470fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::SETP_F16X2) 9480fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::SETP_BF16X2) 9490fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Dummy) 9500fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED) 9510fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED) 9520fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DFloatS32) 9530fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DFloatFloat) 9540fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DFloatFloatLevel) 9550fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DFloatFloatGrad) 9560fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DS32S32) 9570fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DS32Float) 9580fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DS32FloatLevel) 9590fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DS32FloatGrad) 9600fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DU32S32) 9610fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DU32Float) 9620fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DU32FloatLevel) 9630fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DU32FloatGrad) 9640fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayFloatS32) 9650fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayFloatFloat) 9660fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayFloatFloatLevel) 9670fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayFloatFloatGrad) 9680fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayS32S32) 9690fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayS32Float) 9700fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayS32FloatLevel) 9710fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayS32FloatGrad) 9720fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayU32S32) 9730fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayU32Float) 9740fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayU32FloatLevel) 9750fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex1DArrayU32FloatGrad) 9760fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DFloatS32) 9770fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DFloatFloat) 9780fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DFloatFloatLevel) 9790fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DFloatFloatGrad) 9800fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DS32S32) 9810fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DS32Float) 9820fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DS32FloatLevel) 9830fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DS32FloatGrad) 9840fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DU32S32) 9850fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DU32Float) 9860fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DU32FloatLevel) 9870fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DU32FloatGrad) 9880fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayFloatS32) 9890fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayFloatFloat) 9900fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayFloatFloatLevel) 9910fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayFloatFloatGrad) 9920fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayS32S32) 9930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayS32Float) 9940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayS32FloatLevel) 9950fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayS32FloatGrad) 9960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayU32S32) 9970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayU32Float) 9980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayU32FloatLevel) 9990fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex2DArrayU32FloatGrad) 10000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DFloatS32) 10010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DFloatFloat) 10020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DFloatFloatLevel) 10030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DFloatFloatGrad) 10040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DS32S32) 10050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DS32Float) 10060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DS32FloatLevel) 10070fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DS32FloatGrad) 10080fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DU32S32) 10090fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DU32Float) 10100fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DU32FloatLevel) 10110fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tex3DU32FloatGrad) 10120fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeFloatFloat) 10130fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeFloatFloatLevel) 10140fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeS32Float) 10150fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeS32FloatLevel) 10160fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeU32Float) 10170fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeU32FloatLevel) 10180fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayFloatFloat) 10190fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayFloatFloatLevel) 10200fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayS32Float) 10210fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayS32FloatLevel) 10220fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayU32Float) 10230fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexCubeArrayU32FloatLevel) 10240fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4R2DFloatFloat) 10250fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4G2DFloatFloat) 10260fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4B2DFloatFloat) 10270fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4A2DFloatFloat) 10280fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4R2DS64Float) 10290fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4G2DS64Float) 10300fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4B2DS64Float) 10310fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4A2DS64Float) 10320fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4R2DU64Float) 10330fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4G2DU64Float) 10340fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4B2DU64Float) 10350fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4A2DU64Float) 10360b57cec5SDimitry Andric 10370fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DFloatS32) 10380fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DFloatFloat) 10390fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DFloatFloatLevel) 10400fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DFloatFloatGrad) 10410fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DS32S32) 10420fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DS32Float) 10430fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DS32FloatLevel) 10440fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DS32FloatGrad) 10450fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DU32S32) 10460fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DU32Float) 10470fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DU32FloatLevel) 10480fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DU32FloatGrad) 10490fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatS32) 10500fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatFloat) 10510fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatFloatLevel) 10520fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatFloatGrad) 10530fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayS32S32) 10540fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayS32Float) 10550fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayS32FloatLevel) 10560fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayS32FloatGrad) 10570fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayU32S32) 10580fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayU32Float) 10590fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayU32FloatLevel) 10600fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified1DArrayU32FloatGrad) 10610fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DFloatS32) 10620fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DFloatFloat) 10630fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DFloatFloatLevel) 10640fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DFloatFloatGrad) 10650fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DS32S32) 10660fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DS32Float) 10670fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DS32FloatLevel) 10680fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DS32FloatGrad) 10690fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DU32S32) 10700fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DU32Float) 10710fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DU32FloatLevel) 10720fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DU32FloatGrad) 10730fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatS32) 10740fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatFloat) 10750fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatFloatLevel) 10760fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatFloatGrad) 10770fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayS32S32) 10780fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayS32Float) 10790fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayS32FloatLevel) 10800fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayS32FloatGrad) 10810fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayU32S32) 10820fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayU32Float) 10830fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayU32FloatLevel) 10840fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified2DArrayU32FloatGrad) 10850fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DFloatS32) 10860fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DFloatFloat) 10870fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DFloatFloatLevel) 10880fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DFloatFloatGrad) 10890fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DS32S32) 10900fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DS32Float) 10910fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DS32FloatLevel) 10920fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DS32FloatGrad) 10930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DU32S32) 10940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DU32Float) 10950fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DU32FloatLevel) 10960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnified3DU32FloatGrad) 10970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeFloatFloat) 10980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeFloatFloatLevel) 10990fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeS32Float) 11000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeS32FloatLevel) 11010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeU32Float) 11020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeU32FloatLevel) 11030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayFloatFloat) 11040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel) 11050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayS32Float) 11060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayS32FloatLevel) 11070fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayU32Float) 11080fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayU32FloatLevel) 11090fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeFloatFloatGrad) 11100fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeS32FloatGrad) 11110fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeU32FloatGrad) 11120fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad) 11130fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayS32FloatGrad) 11140fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayU32FloatGrad) 11150fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedR2DFloatFloat) 11160fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedG2DFloatFloat) 11170fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedB2DFloatFloat) 11180fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedA2DFloatFloat) 11190fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedR2DS64Float) 11200fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedG2DS64Float) 11210fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedB2DS64Float) 11220fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedA2DS64Float) 11230fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedR2DU64Float) 11240fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedG2DU64Float) 11250fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedB2DU64Float) 11260fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Tld4UnifiedA2DU64Float) 11270b57cec5SDimitry Andric 11280fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI8Clamp) 11290fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI16Clamp) 11300fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI32Clamp) 11310fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI64Clamp) 11320fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I8Clamp) 11330fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I16Clamp) 11340fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I32Clamp) 11350fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I64Clamp) 11360fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I8Clamp) 11370fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I16Clamp) 11380fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I32Clamp) 11390b57cec5SDimitry Andric 11400fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI8Clamp) 11410fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI16Clamp) 11420fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI32Clamp) 11430fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI64Clamp) 11440fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I8Clamp) 11450fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I16Clamp) 11460fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I32Clamp) 11470fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I64Clamp) 11480fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I8Clamp) 11490fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I16Clamp) 11500fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I32Clamp) 11510b57cec5SDimitry Andric 11520fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI8Clamp) 11530fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI16Clamp) 11540fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI32Clamp) 11550fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI64Clamp) 11560fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I8Clamp) 11570fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I16Clamp) 11580fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I32Clamp) 11590fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I64Clamp) 11600fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I8Clamp) 11610fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I16Clamp) 11620fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I32Clamp) 11630b57cec5SDimitry Andric 11640fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI8Clamp) 11650fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI16Clamp) 11660fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI32Clamp) 11670fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI64Clamp) 11680fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I8Clamp) 11690fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I16Clamp) 11700fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I32Clamp) 11710fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I64Clamp) 11720fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I8Clamp) 11730fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I16Clamp) 11740fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I32Clamp) 11750b57cec5SDimitry Andric 11760fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI8Clamp) 11770fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI16Clamp) 11780fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI32Clamp) 11790fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI64Clamp) 11800fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I8Clamp) 11810fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I16Clamp) 11820fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I32Clamp) 11830fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I64Clamp) 11840fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I8Clamp) 11850fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I16Clamp) 11860fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I32Clamp) 11870b57cec5SDimitry Andric 11880fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI8Trap) 11890fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI16Trap) 11900fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI32Trap) 11910fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI64Trap) 11920fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I8Trap) 11930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I16Trap) 11940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I32Trap) 11950fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I64Trap) 11960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I8Trap) 11970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I16Trap) 11980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I32Trap) 11990b57cec5SDimitry Andric 12000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI8Trap) 12010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI16Trap) 12020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI32Trap) 12030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI64Trap) 12040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I8Trap) 12050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I16Trap) 12060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I32Trap) 12070fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I64Trap) 12080fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I8Trap) 12090fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I16Trap) 12100fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I32Trap) 12110b57cec5SDimitry Andric 12120fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI8Trap) 12130fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI16Trap) 12140fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI32Trap) 12150fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI64Trap) 12160fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I8Trap) 12170fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I16Trap) 12180fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I32Trap) 12190fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I64Trap) 12200fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I8Trap) 12210fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I16Trap) 12220fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I32Trap) 12230b57cec5SDimitry Andric 12240fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI8Trap) 12250fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI16Trap) 12260fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI32Trap) 12270fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI64Trap) 12280fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I8Trap) 12290fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I16Trap) 12300fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I32Trap) 12310fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I64Trap) 12320fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I8Trap) 12330fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I16Trap) 12340fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I32Trap) 12350b57cec5SDimitry Andric 12360fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI8Trap) 12370fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI16Trap) 12380fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI32Trap) 12390fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI64Trap) 12400fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I8Trap) 12410fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I16Trap) 12420fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I32Trap) 12430fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I64Trap) 12440fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I8Trap) 12450fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I16Trap) 12460fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I32Trap) 12470b57cec5SDimitry Andric 12480fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI8Zero) 12490fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI16Zero) 12500fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI32Zero) 12510fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DI64Zero) 12520fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I8Zero) 12530fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I16Zero) 12540fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I32Zero) 12550fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV2I64Zero) 12560fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I8Zero) 12570fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I16Zero) 12580fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DV4I32Zero) 12590b57cec5SDimitry Andric 12600fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI8Zero) 12610fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI16Zero) 12620fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI32Zero) 12630fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayI64Zero) 12640fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I8Zero) 12650fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I16Zero) 12660fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I32Zero) 12670fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV2I64Zero) 12680fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I8Zero) 12690fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I16Zero) 12700fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld1DArrayV4I32Zero) 12710b57cec5SDimitry Andric 12720fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI8Zero) 12730fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI16Zero) 12740fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI32Zero) 12750fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DI64Zero) 12760fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I8Zero) 12770fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I16Zero) 12780fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I32Zero) 12790fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV2I64Zero) 12800fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I8Zero) 12810fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I16Zero) 12820fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DV4I32Zero) 12830b57cec5SDimitry Andric 12840fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI8Zero) 12850fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI16Zero) 12860fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI32Zero) 12870fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayI64Zero) 12880fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I8Zero) 12890fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I16Zero) 12900fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I32Zero) 12910fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV2I64Zero) 12920fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I8Zero) 12930fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I16Zero) 12940fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld2DArrayV4I32Zero) 12950fca6ea1SDimitry Andric 12960fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI8Zero) 12970fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI16Zero) 12980fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI32Zero) 12990fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DI64Zero) 13000fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I8Zero) 13010fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I16Zero) 13020fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I32Zero) 13030fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV2I64Zero) 13040fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I8Zero) 13050fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I16Zero) 13060fca6ea1SDimitry Andric MAKE_CASE(NVPTXISD::Suld3DV4I32Zero) 13070b57cec5SDimitry Andric } 13080b57cec5SDimitry Andric return nullptr; 13090fca6ea1SDimitry Andric 13100fca6ea1SDimitry Andric #undef MAKE_CASE 13110b57cec5SDimitry Andric } 13120b57cec5SDimitry Andric 13130b57cec5SDimitry Andric TargetLoweringBase::LegalizeTypeAction 13140b57cec5SDimitry Andric NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const { 1315fe6060f1SDimitry Andric if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && 1316fe6060f1SDimitry Andric VT.getScalarType() == MVT::i1) 13170b57cec5SDimitry Andric return TypeSplitVector; 13185f757f3fSDimitry Andric if (Isv2x16VT(VT)) 13190b57cec5SDimitry Andric return TypeLegal; 13200b57cec5SDimitry Andric return TargetLoweringBase::getPreferredVectorAction(VT); 13210b57cec5SDimitry Andric } 13220b57cec5SDimitry Andric 13230b57cec5SDimitry Andric SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, 13240b57cec5SDimitry Andric int Enabled, int &ExtraSteps, 13250b57cec5SDimitry Andric bool &UseOneConst, 13260b57cec5SDimitry Andric bool Reciprocal) const { 13270b57cec5SDimitry Andric if (!(Enabled == ReciprocalEstimate::Enabled || 13280b57cec5SDimitry Andric (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32()))) 13290b57cec5SDimitry Andric return SDValue(); 13300b57cec5SDimitry Andric 13310b57cec5SDimitry Andric if (ExtraSteps == ReciprocalEstimate::Unspecified) 13320b57cec5SDimitry Andric ExtraSteps = 0; 13330b57cec5SDimitry Andric 13340b57cec5SDimitry Andric SDLoc DL(Operand); 13350b57cec5SDimitry Andric EVT VT = Operand.getValueType(); 13360b57cec5SDimitry Andric bool Ftz = useF32FTZ(DAG.getMachineFunction()); 13370b57cec5SDimitry Andric 13380b57cec5SDimitry Andric auto MakeIntrinsicCall = [&](Intrinsic::ID IID) { 13390b57cec5SDimitry Andric return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 13400b57cec5SDimitry Andric DAG.getConstant(IID, DL, MVT::i32), Operand); 13410b57cec5SDimitry Andric }; 13420b57cec5SDimitry Andric 13430b57cec5SDimitry Andric // The sqrt and rsqrt refinement processes assume we always start out with an 13440b57cec5SDimitry Andric // approximation of the rsqrt. Therefore, if we're going to do any refinement 13450b57cec5SDimitry Andric // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing 13460b57cec5SDimitry Andric // any refinement, we must return a regular sqrt. 13470b57cec5SDimitry Andric if (Reciprocal || ExtraSteps > 0) { 13480b57cec5SDimitry Andric if (VT == MVT::f32) 13490b57cec5SDimitry Andric return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f 13500b57cec5SDimitry Andric : Intrinsic::nvvm_rsqrt_approx_f); 13510b57cec5SDimitry Andric else if (VT == MVT::f64) 13520b57cec5SDimitry Andric return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d); 13530b57cec5SDimitry Andric else 13540b57cec5SDimitry Andric return SDValue(); 13550b57cec5SDimitry Andric } else { 13560b57cec5SDimitry Andric if (VT == MVT::f32) 13570b57cec5SDimitry Andric return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f 13580b57cec5SDimitry Andric : Intrinsic::nvvm_sqrt_approx_f); 13590b57cec5SDimitry Andric else { 13600b57cec5SDimitry Andric // There's no sqrt.approx.f64 instruction, so we emit 13610b57cec5SDimitry Andric // reciprocal(rsqrt(x)). This is faster than 13620b57cec5SDimitry Andric // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain 13630b57cec5SDimitry Andric // x * rsqrt(x).) 13640b57cec5SDimitry Andric return DAG.getNode( 13650b57cec5SDimitry Andric ISD::INTRINSIC_WO_CHAIN, DL, VT, 13660b57cec5SDimitry Andric DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32), 13670b57cec5SDimitry Andric MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d)); 13680b57cec5SDimitry Andric } 13690b57cec5SDimitry Andric } 13700b57cec5SDimitry Andric } 13710b57cec5SDimitry Andric 13720b57cec5SDimitry Andric SDValue 13730b57cec5SDimitry Andric NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 13740b57cec5SDimitry Andric SDLoc dl(Op); 13750b57cec5SDimitry Andric const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op); 13760b57cec5SDimitry Andric auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace()); 13770b57cec5SDimitry Andric Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT); 13780b57cec5SDimitry Andric return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); 13790b57cec5SDimitry Andric } 13800b57cec5SDimitry Andric 138106c3fb27SDimitry Andric static bool IsTypePassedAsArray(const Type *Ty) { 138206c3fb27SDimitry Andric return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) || 138306c3fb27SDimitry Andric Ty->isHalfTy() || Ty->isBFloatTy(); 138406c3fb27SDimitry Andric } 138506c3fb27SDimitry Andric 13860b57cec5SDimitry Andric std::string NVPTXTargetLowering::getPrototype( 13870b57cec5SDimitry Andric const DataLayout &DL, Type *retTy, const ArgListTy &Args, 13885ffd83dbSDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment, 1389bdd1243dSDimitry Andric std::optional<std::pair<unsigned, const APInt &>> VAInfo, 1390e8d8bef9SDimitry Andric const CallBase &CB, unsigned UniqueCallSite) const { 13910b57cec5SDimitry Andric auto PtrVT = getPointerTy(DL); 13920b57cec5SDimitry Andric 13930b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20); 13940b57cec5SDimitry Andric assert(isABI && "Non-ABI compilation is not supported"); 13950b57cec5SDimitry Andric if (!isABI) 13960b57cec5SDimitry Andric return ""; 13970b57cec5SDimitry Andric 1398bdd1243dSDimitry Andric std::string Prototype; 1399bdd1243dSDimitry Andric raw_string_ostream O(Prototype); 1400e8d8bef9SDimitry Andric O << "prototype_" << UniqueCallSite << " : .callprototype "; 14010b57cec5SDimitry Andric 14020b57cec5SDimitry Andric if (retTy->getTypeID() == Type::VoidTyID) { 14030b57cec5SDimitry Andric O << "()"; 14040b57cec5SDimitry Andric } else { 14050b57cec5SDimitry Andric O << "("; 140606c3fb27SDimitry Andric if ((retTy->isFloatingPointTy() || retTy->isIntegerTy()) && 140706c3fb27SDimitry Andric !IsTypePassedAsArray(retTy)) { 14080b57cec5SDimitry Andric unsigned size = 0; 14090b57cec5SDimitry Andric if (auto *ITy = dyn_cast<IntegerType>(retTy)) { 14100b57cec5SDimitry Andric size = ITy->getBitWidth(); 14110b57cec5SDimitry Andric } else { 14120b57cec5SDimitry Andric assert(retTy->isFloatingPointTy() && 14130b57cec5SDimitry Andric "Floating point type expected here"); 14140b57cec5SDimitry Andric size = retTy->getPrimitiveSizeInBits(); 14150b57cec5SDimitry Andric } 14160b57cec5SDimitry Andric // PTX ABI requires all scalar return values to be at least 32 14170b57cec5SDimitry Andric // bits in size. fp16 normally uses .b16 as its storage type in 14180b57cec5SDimitry Andric // PTX, so its size must be adjusted here, too. 1419fcaf7f86SDimitry Andric size = promoteScalarArgumentSize(size); 14200b57cec5SDimitry Andric 14210b57cec5SDimitry Andric O << ".param .b" << size << " _"; 14220b57cec5SDimitry Andric } else if (isa<PointerType>(retTy)) { 14230b57cec5SDimitry Andric O << ".param .b" << PtrVT.getSizeInBits() << " _"; 142406c3fb27SDimitry Andric } else if (IsTypePassedAsArray(retTy)) { 14255ffd83dbSDimitry Andric O << ".param .align " << (retAlignment ? retAlignment->value() : 0) 14265ffd83dbSDimitry Andric << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]"; 14270b57cec5SDimitry Andric } else { 14280b57cec5SDimitry Andric llvm_unreachable("Unknown return type"); 14290b57cec5SDimitry Andric } 14300b57cec5SDimitry Andric O << ") "; 14310b57cec5SDimitry Andric } 14320b57cec5SDimitry Andric O << "_ ("; 14330b57cec5SDimitry Andric 14340b57cec5SDimitry Andric bool first = true; 14350b57cec5SDimitry Andric 1436bdd1243dSDimitry Andric unsigned NumArgs = VAInfo ? VAInfo->first : Args.size(); 1437bdd1243dSDimitry Andric for (unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) { 14380b57cec5SDimitry Andric Type *Ty = Args[i].Ty; 14390b57cec5SDimitry Andric if (!first) { 14400b57cec5SDimitry Andric O << ", "; 14410b57cec5SDimitry Andric } 14420b57cec5SDimitry Andric first = false; 14430b57cec5SDimitry Andric 14440b57cec5SDimitry Andric if (!Outs[OIdx].Flags.isByVal()) { 144506c3fb27SDimitry Andric if (IsTypePassedAsArray(Ty)) { 14460fca6ea1SDimitry Andric Align ParamAlign = 14470fca6ea1SDimitry Andric getArgumentAlignment(&CB, Ty, i + AttributeList::FirstArgIndex, DL); 14480fca6ea1SDimitry Andric O << ".param .align " << ParamAlign.value() << " .b8 "; 14490b57cec5SDimitry Andric O << "_"; 145081ad6265SDimitry Andric O << "[" << DL.getTypeAllocSize(Ty) << "]"; 14510b57cec5SDimitry Andric // update the index for Outs 14520b57cec5SDimitry Andric SmallVector<EVT, 16> vtparts; 14530b57cec5SDimitry Andric ComputeValueVTs(*this, DL, Ty, vtparts); 14540b57cec5SDimitry Andric if (unsigned len = vtparts.size()) 14550b57cec5SDimitry Andric OIdx += len - 1; 14560b57cec5SDimitry Andric continue; 14570b57cec5SDimitry Andric } 14580b57cec5SDimitry Andric // i8 types in IR will be i16 types in SDAG 14590b57cec5SDimitry Andric assert((getValueType(DL, Ty) == Outs[OIdx].VT || 14600b57cec5SDimitry Andric (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && 14610b57cec5SDimitry Andric "type mismatch between callee prototype and arguments"); 14620b57cec5SDimitry Andric // scalar type 14630b57cec5SDimitry Andric unsigned sz = 0; 14640b57cec5SDimitry Andric if (isa<IntegerType>(Ty)) { 14650b57cec5SDimitry Andric sz = cast<IntegerType>(Ty)->getBitWidth(); 1466fcaf7f86SDimitry Andric sz = promoteScalarArgumentSize(sz); 14670b57cec5SDimitry Andric } else if (isa<PointerType>(Ty)) { 14680b57cec5SDimitry Andric sz = PtrVT.getSizeInBits(); 146906c3fb27SDimitry Andric } else { 14700b57cec5SDimitry Andric sz = Ty->getPrimitiveSizeInBits(); 147106c3fb27SDimitry Andric } 14720b57cec5SDimitry Andric O << ".param .b" << sz << " "; 14730b57cec5SDimitry Andric O << "_"; 14740b57cec5SDimitry Andric continue; 14750b57cec5SDimitry Andric } 14760b57cec5SDimitry Andric 147736b606aeSDimitry Andric // Indirect calls need strict ABI alignment so we disable optimizations by 147836b606aeSDimitry Andric // not providing a function to optimize. 147981ad6265SDimitry Andric Type *ETy = Args[i].IndirectType; 1480bdd1243dSDimitry Andric Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign(); 1481bdd1243dSDimitry Andric Align ParamByValAlign = 148236b606aeSDimitry Andric getFunctionByValParamAlign(/*F=*/nullptr, ETy, InitialAlign, DL); 148381ad6265SDimitry Andric 148481ad6265SDimitry Andric O << ".param .align " << ParamByValAlign.value() << " .b8 "; 14850b57cec5SDimitry Andric O << "_"; 148681ad6265SDimitry Andric O << "[" << Outs[OIdx].Flags.getByValSize() << "]"; 14870b57cec5SDimitry Andric } 1488bdd1243dSDimitry Andric 1489bdd1243dSDimitry Andric if (VAInfo) 1490bdd1243dSDimitry Andric O << (first ? "" : ",") << " .param .align " << VAInfo->second 1491bdd1243dSDimitry Andric << " .b8 _[]\n"; 1492bdd1243dSDimitry Andric O << ")"; 1493bdd1243dSDimitry Andric if (shouldEmitPTXNoReturn(&CB, *nvTM)) 1494bdd1243dSDimitry Andric O << " .noreturn"; 1495bdd1243dSDimitry Andric O << ";"; 1496bdd1243dSDimitry Andric 1497bdd1243dSDimitry Andric return Prototype; 14980b57cec5SDimitry Andric } 14990b57cec5SDimitry Andric 15000fca6ea1SDimitry Andric Align NVPTXTargetLowering::getFunctionArgumentAlignment( 15010fca6ea1SDimitry Andric const Function *F, Type *Ty, unsigned Idx, const DataLayout &DL) const { 15020fca6ea1SDimitry Andric return getAlign(*F, Idx).value_or(getFunctionParamOptimizedAlign(F, Ty, DL)); 15030fca6ea1SDimitry Andric } 15040fca6ea1SDimitry Andric 15057a6dacacSDimitry Andric Align NVPTXTargetLowering::getArgumentAlignment(const CallBase *CB, Type *Ty, 15065ffd83dbSDimitry Andric unsigned Idx, 15070b57cec5SDimitry Andric const DataLayout &DL) const { 15085ffd83dbSDimitry Andric if (!CB) { 15090b57cec5SDimitry Andric // CallSite is zero, fallback to ABI type alignment 15105ffd83dbSDimitry Andric return DL.getABITypeAlign(Ty); 15110b57cec5SDimitry Andric } 15120b57cec5SDimitry Andric 15135ffd83dbSDimitry Andric const Function *DirectCallee = CB->getCalledFunction(); 15140b57cec5SDimitry Andric 15150b57cec5SDimitry Andric if (!DirectCallee) { 15160b57cec5SDimitry Andric // We don't have a direct function symbol, but that may be because of 15170b57cec5SDimitry Andric // constant cast instructions in the call. 15180b57cec5SDimitry Andric 15190b57cec5SDimitry Andric // With bitcast'd call targets, the instruction will be the call 15205ffd83dbSDimitry Andric if (const auto *CI = dyn_cast<CallInst>(CB)) { 15210b57cec5SDimitry Andric // Check if we have call alignment metadata 15220fca6ea1SDimitry Andric if (MaybeAlign StackAlign = getAlign(*CI, Idx)) 15230fca6ea1SDimitry Andric return StackAlign.value(); 15240b57cec5SDimitry Andric } 1525bdd1243dSDimitry Andric DirectCallee = getMaybeBitcastedCallee(CB); 15260b57cec5SDimitry Andric } 15270b57cec5SDimitry Andric 15280b57cec5SDimitry Andric // Check for function alignment information if we found that the 15290b57cec5SDimitry Andric // ultimate target is a Function 15300fca6ea1SDimitry Andric if (DirectCallee) 15310fca6ea1SDimitry Andric return getFunctionArgumentAlignment(DirectCallee, Ty, Idx, DL); 15320b57cec5SDimitry Andric 153381ad6265SDimitry Andric // Call is indirect, fall back to the ABI type alignment 15345ffd83dbSDimitry Andric return DL.getABITypeAlign(Ty); 15350b57cec5SDimitry Andric } 15360b57cec5SDimitry Andric 15370fca6ea1SDimitry Andric static bool adjustElementType(EVT &ElementType) { 15380fca6ea1SDimitry Andric switch (ElementType.getSimpleVT().SimpleTy) { 15390fca6ea1SDimitry Andric default: 15400fca6ea1SDimitry Andric return false; 15410fca6ea1SDimitry Andric case MVT::f16: 15420fca6ea1SDimitry Andric case MVT::bf16: 15430fca6ea1SDimitry Andric ElementType = MVT::i16; 15440fca6ea1SDimitry Andric return true; 15450fca6ea1SDimitry Andric case MVT::f32: 15460fca6ea1SDimitry Andric case MVT::v2f16: 15470fca6ea1SDimitry Andric case MVT::v2bf16: 15480fca6ea1SDimitry Andric ElementType = MVT::i32; 15490fca6ea1SDimitry Andric return true; 15500fca6ea1SDimitry Andric case MVT::f64: 15510fca6ea1SDimitry Andric ElementType = MVT::i64; 15520fca6ea1SDimitry Andric return true; 15530fca6ea1SDimitry Andric } 15540fca6ea1SDimitry Andric } 15550fca6ea1SDimitry Andric 15560fca6ea1SDimitry Andric // Use byte-store when the param address of the argument value is unaligned. 15570fca6ea1SDimitry Andric // This may happen when the return value is a field of a packed structure. 15580fca6ea1SDimitry Andric // 15590fca6ea1SDimitry Andric // This is called in LowerCall() when passing the param values. 15600fca6ea1SDimitry Andric static SDValue LowerUnalignedStoreParam(SelectionDAG &DAG, SDValue Chain, 15610fca6ea1SDimitry Andric uint64_t Offset, EVT ElementType, 15620fca6ea1SDimitry Andric SDValue StVal, SDValue &InGlue, 15630fca6ea1SDimitry Andric unsigned ArgID, const SDLoc &dl) { 15640fca6ea1SDimitry Andric // Bit logic only works on integer types 15650fca6ea1SDimitry Andric if (adjustElementType(ElementType)) 15660fca6ea1SDimitry Andric StVal = DAG.getNode(ISD::BITCAST, dl, ElementType, StVal); 15670fca6ea1SDimitry Andric 15680fca6ea1SDimitry Andric // Store each byte 15690fca6ea1SDimitry Andric SDVTList StoreVTs = DAG.getVTList(MVT::Other, MVT::Glue); 15700fca6ea1SDimitry Andric for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) { 15710fca6ea1SDimitry Andric // Shift the byte to the last byte position 15720fca6ea1SDimitry Andric SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, StVal, 15730fca6ea1SDimitry Andric DAG.getConstant(i * 8, dl, MVT::i32)); 15740fca6ea1SDimitry Andric SDValue StoreOperands[] = {Chain, DAG.getConstant(ArgID, dl, MVT::i32), 15750fca6ea1SDimitry Andric DAG.getConstant(Offset + i, dl, MVT::i32), 15760fca6ea1SDimitry Andric ShiftVal, InGlue}; 15770fca6ea1SDimitry Andric // Trunc store only the last byte by using 15780fca6ea1SDimitry Andric // st.param.b8 15790fca6ea1SDimitry Andric // The register type can be larger than b8. 15800fca6ea1SDimitry Andric Chain = DAG.getMemIntrinsicNode( 15810fca6ea1SDimitry Andric NVPTXISD::StoreParam, dl, StoreVTs, StoreOperands, MVT::i8, 15820fca6ea1SDimitry Andric MachinePointerInfo(), Align(1), MachineMemOperand::MOStore); 15830fca6ea1SDimitry Andric InGlue = Chain.getValue(1); 15840fca6ea1SDimitry Andric } 15850fca6ea1SDimitry Andric return Chain; 15860fca6ea1SDimitry Andric } 15870fca6ea1SDimitry Andric 15880fca6ea1SDimitry Andric // Use byte-load when the param adress of the returned value is unaligned. 15890fca6ea1SDimitry Andric // This may happen when the returned value is a field of a packed structure. 15900fca6ea1SDimitry Andric static SDValue 15910fca6ea1SDimitry Andric LowerUnalignedLoadRetParam(SelectionDAG &DAG, SDValue &Chain, uint64_t Offset, 15920fca6ea1SDimitry Andric EVT ElementType, SDValue &InGlue, 15930fca6ea1SDimitry Andric SmallVectorImpl<SDValue> &TempProxyRegOps, 15940fca6ea1SDimitry Andric const SDLoc &dl) { 15950fca6ea1SDimitry Andric // Bit logic only works on integer types 15960fca6ea1SDimitry Andric EVT MergedType = ElementType; 15970fca6ea1SDimitry Andric adjustElementType(MergedType); 15980fca6ea1SDimitry Andric 15990fca6ea1SDimitry Andric // Load each byte and construct the whole value. Initial value to 0 16000fca6ea1SDimitry Andric SDValue RetVal = DAG.getConstant(0, dl, MergedType); 16010fca6ea1SDimitry Andric // LoadParamMemI8 loads into i16 register only 16020fca6ea1SDimitry Andric SDVTList LoadVTs = DAG.getVTList(MVT::i16, MVT::Other, MVT::Glue); 16030fca6ea1SDimitry Andric for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) { 16040fca6ea1SDimitry Andric SDValue LoadOperands[] = {Chain, DAG.getConstant(1, dl, MVT::i32), 16050fca6ea1SDimitry Andric DAG.getConstant(Offset + i, dl, MVT::i32), 16060fca6ea1SDimitry Andric InGlue}; 16070fca6ea1SDimitry Andric // This will be selected to LoadParamMemI8 16080fca6ea1SDimitry Andric SDValue LdVal = 16090fca6ea1SDimitry Andric DAG.getMemIntrinsicNode(NVPTXISD::LoadParam, dl, LoadVTs, LoadOperands, 16100fca6ea1SDimitry Andric MVT::i8, MachinePointerInfo(), Align(1)); 16110fca6ea1SDimitry Andric SDValue TmpLdVal = LdVal.getValue(0); 16120fca6ea1SDimitry Andric Chain = LdVal.getValue(1); 16130fca6ea1SDimitry Andric InGlue = LdVal.getValue(2); 16140fca6ea1SDimitry Andric 16150fca6ea1SDimitry Andric TmpLdVal = DAG.getNode(NVPTXISD::ProxyReg, dl, 16160fca6ea1SDimitry Andric TmpLdVal.getSimpleValueType(), TmpLdVal); 16170fca6ea1SDimitry Andric TempProxyRegOps.push_back(TmpLdVal); 16180fca6ea1SDimitry Andric 16190fca6ea1SDimitry Andric SDValue CMask = DAG.getConstant(255, dl, MergedType); 16200fca6ea1SDimitry Andric SDValue CShift = DAG.getConstant(i * 8, dl, MVT::i32); 16210fca6ea1SDimitry Andric // Need to extend the i16 register to the whole width. 16220fca6ea1SDimitry Andric TmpLdVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MergedType, TmpLdVal); 16230fca6ea1SDimitry Andric // Mask off the high bits. Leave only the lower 8bits. 16240fca6ea1SDimitry Andric // Do this because we are using loadparam.b8. 16250fca6ea1SDimitry Andric TmpLdVal = DAG.getNode(ISD::AND, dl, MergedType, TmpLdVal, CMask); 16260fca6ea1SDimitry Andric // Shift and merge 16270fca6ea1SDimitry Andric TmpLdVal = DAG.getNode(ISD::SHL, dl, MergedType, TmpLdVal, CShift); 16280fca6ea1SDimitry Andric RetVal = DAG.getNode(ISD::OR, dl, MergedType, RetVal, TmpLdVal); 16290fca6ea1SDimitry Andric } 16300fca6ea1SDimitry Andric if (ElementType != MergedType) 16310fca6ea1SDimitry Andric RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal); 16320fca6ea1SDimitry Andric 16330fca6ea1SDimitry Andric return RetVal; 16340fca6ea1SDimitry Andric } 16350fca6ea1SDimitry Andric 16360b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 16370b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals) const { 1638bdd1243dSDimitry Andric 1639bdd1243dSDimitry Andric if (CLI.IsVarArg && (STI.getPTXVersion() < 60 || STI.getSmVersion() < 30)) 1640bdd1243dSDimitry Andric report_fatal_error( 1641bdd1243dSDimitry Andric "Support for variadic functions (unsized array parameter) introduced " 1642bdd1243dSDimitry Andric "in PTX ISA version 6.0 and requires target sm_30."); 1643bdd1243dSDimitry Andric 16440b57cec5SDimitry Andric SelectionDAG &DAG = CLI.DAG; 16450b57cec5SDimitry Andric SDLoc dl = CLI.DL; 16460b57cec5SDimitry Andric SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 16470b57cec5SDimitry Andric SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 16480b57cec5SDimitry Andric SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 16490b57cec5SDimitry Andric SDValue Chain = CLI.Chain; 16500b57cec5SDimitry Andric SDValue Callee = CLI.Callee; 16510b57cec5SDimitry Andric bool &isTailCall = CLI.IsTailCall; 16520b57cec5SDimitry Andric ArgListTy &Args = CLI.getArgs(); 16530b57cec5SDimitry Andric Type *RetTy = CLI.RetTy; 16545ffd83dbSDimitry Andric const CallBase *CB = CLI.CB; 16550b57cec5SDimitry Andric const DataLayout &DL = DAG.getDataLayout(); 16560b57cec5SDimitry Andric 16570b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20); 16580b57cec5SDimitry Andric assert(isABI && "Non-ABI compilation is not supported"); 16590b57cec5SDimitry Andric if (!isABI) 16600b57cec5SDimitry Andric return Chain; 16610b57cec5SDimitry Andric 1662bdd1243dSDimitry Andric // Variadic arguments. 1663bdd1243dSDimitry Andric // 1664bdd1243dSDimitry Andric // Normally, for each argument, we declare a param scalar or a param 1665bdd1243dSDimitry Andric // byte array in the .param space, and store the argument value to that 1666bdd1243dSDimitry Andric // param scalar or array starting at offset 0. 1667bdd1243dSDimitry Andric // 1668bdd1243dSDimitry Andric // In the case of the first variadic argument, we declare a vararg byte array 1669bdd1243dSDimitry Andric // with size 0. The exact size of this array isn't known at this point, so 1670bdd1243dSDimitry Andric // it'll be patched later. All the variadic arguments will be stored to this 1671bdd1243dSDimitry Andric // array at a certain offset (which gets tracked by 'VAOffset'). The offset is 1672bdd1243dSDimitry Andric // initially set to 0, so it can be used for non-variadic arguments (which use 1673bdd1243dSDimitry Andric // 0 offset) to simplify the code. 1674bdd1243dSDimitry Andric // 1675bdd1243dSDimitry Andric // After all vararg is processed, 'VAOffset' holds the size of the 1676bdd1243dSDimitry Andric // vararg byte array. 1677bdd1243dSDimitry Andric 1678bdd1243dSDimitry Andric SDValue VADeclareParam; // vararg byte array 1679bdd1243dSDimitry Andric unsigned FirstVAArg = CLI.NumFixedArgs; // position of the first variadic 1680bdd1243dSDimitry Andric unsigned VAOffset = 0; // current offset in the param array 1681bdd1243dSDimitry Andric 1682e8d8bef9SDimitry Andric unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1); 168381ad6265SDimitry Andric SDValue TempChain = Chain; 1684e8d8bef9SDimitry Andric Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl); 168506c3fb27SDimitry Andric SDValue InGlue = Chain.getValue(1); 16860b57cec5SDimitry Andric 168781ad6265SDimitry Andric unsigned ParamCount = 0; 16880b57cec5SDimitry Andric // Args.size() and Outs.size() need not match. 16890b57cec5SDimitry Andric // Outs.size() will be larger 16900b57cec5SDimitry Andric // * if there is an aggregate argument with multiple fields (each field 16910b57cec5SDimitry Andric // showing up separately in Outs) 16920b57cec5SDimitry Andric // * if there is a vector argument with more than typical vector-length 16930b57cec5SDimitry Andric // elements (generally if more than 4) where each vector element is 16940b57cec5SDimitry Andric // individually present in Outs. 16950b57cec5SDimitry Andric // So a different index should be used for indexing into Outs/OutVals. 16960b57cec5SDimitry Andric // See similar issue in LowerFormalArguments. 16970b57cec5SDimitry Andric unsigned OIdx = 0; 16980b57cec5SDimitry Andric // Declare the .params or .reg need to pass values 16990b57cec5SDimitry Andric // to the function 17000b57cec5SDimitry Andric for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 17010b57cec5SDimitry Andric EVT VT = Outs[OIdx].VT; 17020b57cec5SDimitry Andric Type *Ty = Args[i].Ty; 1703bdd1243dSDimitry Andric bool IsVAArg = (i >= CLI.NumFixedArgs); 170481ad6265SDimitry Andric bool IsByVal = Outs[OIdx].Flags.isByVal(); 17050b57cec5SDimitry Andric 17060b57cec5SDimitry Andric SmallVector<EVT, 16> VTs; 17070b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets; 170881ad6265SDimitry Andric 170981ad6265SDimitry Andric assert((!IsByVal || Args[i].IndirectType) && 171081ad6265SDimitry Andric "byval arg must have indirect type"); 171181ad6265SDimitry Andric Type *ETy = (IsByVal ? Args[i].IndirectType : Ty); 1712bdd1243dSDimitry Andric ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets, IsByVal ? 0 : VAOffset); 171381ad6265SDimitry Andric 171481ad6265SDimitry Andric Align ArgAlign; 171581ad6265SDimitry Andric if (IsByVal) { 171681ad6265SDimitry Andric // The ByValAlign in the Outs[OIdx].Flags is always set at this point, 171781ad6265SDimitry Andric // so we don't need to worry whether it's naturally aligned or not. 171881ad6265SDimitry Andric // See TargetLowering::LowerCallTo(). 1719bdd1243dSDimitry Andric Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign(); 1720bdd1243dSDimitry Andric ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy, 1721bdd1243dSDimitry Andric InitialAlign, DL); 1722bdd1243dSDimitry Andric if (IsVAArg) 1723bdd1243dSDimitry Andric VAOffset = alignTo(VAOffset, ArgAlign); 172481ad6265SDimitry Andric } else { 17257a6dacacSDimitry Andric ArgAlign = getArgumentAlignment(CB, Ty, ParamCount + 1, DL); 172681ad6265SDimitry Andric } 172781ad6265SDimitry Andric 172881ad6265SDimitry Andric unsigned TypeSize = 172981ad6265SDimitry Andric (IsByVal ? Outs[OIdx].Flags.getByValSize() : DL.getTypeAllocSize(Ty)); 17300b57cec5SDimitry Andric SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 173181ad6265SDimitry Andric 17320b57cec5SDimitry Andric bool NeedAlign; // Does argument declaration specify alignment? 173306c3fb27SDimitry Andric bool PassAsArray = IsByVal || IsTypePassedAsArray(Ty); 1734bdd1243dSDimitry Andric if (IsVAArg) { 1735bdd1243dSDimitry Andric if (ParamCount == FirstVAArg) { 1736bdd1243dSDimitry Andric SDValue DeclareParamOps[] = { 1737bdd1243dSDimitry Andric Chain, DAG.getConstant(STI.getMaxRequiredAlignment(), dl, MVT::i32), 1738bdd1243dSDimitry Andric DAG.getConstant(ParamCount, dl, MVT::i32), 173906c3fb27SDimitry Andric DAG.getConstant(1, dl, MVT::i32), InGlue}; 1740bdd1243dSDimitry Andric VADeclareParam = Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, 1741bdd1243dSDimitry Andric DeclareParamVTs, DeclareParamOps); 1742bdd1243dSDimitry Andric } 174306c3fb27SDimitry Andric NeedAlign = PassAsArray; 174406c3fb27SDimitry Andric } else if (PassAsArray) { 17450b57cec5SDimitry Andric // declare .param .align <align> .b8 .param<n>[<size>]; 17460b57cec5SDimitry Andric SDValue DeclareParamOps[] = { 17475ffd83dbSDimitry Andric Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), 174881ad6265SDimitry Andric DAG.getConstant(ParamCount, dl, MVT::i32), 174906c3fb27SDimitry Andric DAG.getConstant(TypeSize, dl, MVT::i32), InGlue}; 17500b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 17510b57cec5SDimitry Andric DeclareParamOps); 17520b57cec5SDimitry Andric NeedAlign = true; 17530b57cec5SDimitry Andric } else { 17540b57cec5SDimitry Andric // declare .param .b<size> .param<n>; 1755fcaf7f86SDimitry Andric if (VT.isInteger() || VT.isFloatingPoint()) { 17560b57cec5SDimitry Andric // PTX ABI requires integral types to be at least 32 bits in 17570b57cec5SDimitry Andric // size. FP16 is loaded/stored using i16, so it's handled 17580b57cec5SDimitry Andric // here as well. 1759fcaf7f86SDimitry Andric TypeSize = promoteScalarArgumentSize(TypeSize * 8) / 8; 17600b57cec5SDimitry Andric } 17610b57cec5SDimitry Andric SDValue DeclareScalarParamOps[] = { 176281ad6265SDimitry Andric Chain, DAG.getConstant(ParamCount, dl, MVT::i32), 176381ad6265SDimitry Andric DAG.getConstant(TypeSize * 8, dl, MVT::i32), 176406c3fb27SDimitry Andric DAG.getConstant(0, dl, MVT::i32), InGlue}; 17650b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 17660b57cec5SDimitry Andric DeclareScalarParamOps); 17670b57cec5SDimitry Andric NeedAlign = false; 17680b57cec5SDimitry Andric } 176906c3fb27SDimitry Andric InGlue = Chain.getValue(1); 17700b57cec5SDimitry Andric 17710b57cec5SDimitry Andric // PTX Interoperability Guide 3.3(A): [Integer] Values shorter 17720b57cec5SDimitry Andric // than 32-bits are sign extended or zero extended, depending on 17730b57cec5SDimitry Andric // whether they are signed or unsigned types. This case applies 17740b57cec5SDimitry Andric // only to scalar parameters and not to aggregate values. 17750b57cec5SDimitry Andric bool ExtendIntegerParam = 17760b57cec5SDimitry Andric Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32; 17770b57cec5SDimitry Andric 1778bdd1243dSDimitry Andric auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg); 17790b57cec5SDimitry Andric SmallVector<SDValue, 6> StoreOperands; 17800b57cec5SDimitry Andric for (unsigned j = 0, je = VTs.size(); j != je; ++j) { 178181ad6265SDimitry Andric EVT EltVT = VTs[j]; 178281ad6265SDimitry Andric int CurOffset = Offsets[j]; 178381ad6265SDimitry Andric MaybeAlign PartAlign; 178481ad6265SDimitry Andric if (NeedAlign) 178581ad6265SDimitry Andric PartAlign = commonAlignment(ArgAlign, CurOffset); 178681ad6265SDimitry Andric 17870b57cec5SDimitry Andric SDValue StVal = OutVals[OIdx]; 1788fcaf7f86SDimitry Andric 1789fcaf7f86SDimitry Andric MVT PromotedVT; 1790fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) { 1791fcaf7f86SDimitry Andric EltVT = EVT(PromotedVT); 1792fcaf7f86SDimitry Andric } 1793fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(StVal.getValueType(), &PromotedVT)) { 1794fcaf7f86SDimitry Andric llvm::ISD::NodeType Ext = 1795fcaf7f86SDimitry Andric Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 1796fcaf7f86SDimitry Andric StVal = DAG.getNode(Ext, dl, PromotedVT, StVal); 1797fcaf7f86SDimitry Andric } 1798fcaf7f86SDimitry Andric 179981ad6265SDimitry Andric if (IsByVal) { 180081ad6265SDimitry Andric auto PtrVT = getPointerTy(DL); 180181ad6265SDimitry Andric SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal, 180281ad6265SDimitry Andric DAG.getConstant(CurOffset, dl, PtrVT)); 180381ad6265SDimitry Andric StVal = DAG.getLoad(EltVT, dl, TempChain, srcAddr, MachinePointerInfo(), 180481ad6265SDimitry Andric PartAlign); 180581ad6265SDimitry Andric } else if (ExtendIntegerParam) { 18060b57cec5SDimitry Andric assert(VTs.size() == 1 && "Scalar can't have multiple parts."); 18070b57cec5SDimitry Andric // zext/sext to i32 18080b57cec5SDimitry Andric StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND 18090b57cec5SDimitry Andric : ISD::ZERO_EXTEND, 18100b57cec5SDimitry Andric dl, MVT::i32, StVal); 181181ad6265SDimitry Andric } 181281ad6265SDimitry Andric 181381ad6265SDimitry Andric if (!ExtendIntegerParam && EltVT.getSizeInBits() < 16) { 18140b57cec5SDimitry Andric // Use 16-bit registers for small stores as it's the 18150b57cec5SDimitry Andric // smallest general purpose register size supported by NVPTX. 18160b57cec5SDimitry Andric StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); 18170b57cec5SDimitry Andric } 18180b57cec5SDimitry Andric 18190fca6ea1SDimitry Andric // If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a 18200fca6ea1SDimitry Andric // scalar store. In such cases, fall back to byte stores. 18210fca6ea1SDimitry Andric if (VectorInfo[j] == PVF_SCALAR && !IsVAArg && PartAlign.has_value() && 18220fca6ea1SDimitry Andric PartAlign.value() < 18230fca6ea1SDimitry Andric DL.getABITypeAlign(EltVT.getTypeForEVT(*DAG.getContext()))) { 18240fca6ea1SDimitry Andric assert(StoreOperands.empty() && "Unfinished preceeding store."); 18250fca6ea1SDimitry Andric Chain = LowerUnalignedStoreParam( 18260fca6ea1SDimitry Andric DAG, Chain, IsByVal ? CurOffset + VAOffset : CurOffset, EltVT, 18270fca6ea1SDimitry Andric StVal, InGlue, ParamCount, dl); 18280fca6ea1SDimitry Andric 18290fca6ea1SDimitry Andric // LowerUnalignedStoreParam took care of inserting the necessary nodes 18300fca6ea1SDimitry Andric // into the SDAG, so just move on to the next element. 18310fca6ea1SDimitry Andric if (!IsByVal) 18320fca6ea1SDimitry Andric ++OIdx; 18330fca6ea1SDimitry Andric continue; 18340fca6ea1SDimitry Andric } 18350fca6ea1SDimitry Andric 18360fca6ea1SDimitry Andric // New store. 18370fca6ea1SDimitry Andric if (VectorInfo[j] & PVF_FIRST) { 18380fca6ea1SDimitry Andric assert(StoreOperands.empty() && "Unfinished preceding store."); 18390fca6ea1SDimitry Andric StoreOperands.push_back(Chain); 18400fca6ea1SDimitry Andric StoreOperands.push_back( 18410fca6ea1SDimitry Andric DAG.getConstant(IsVAArg ? FirstVAArg : ParamCount, dl, MVT::i32)); 18420fca6ea1SDimitry Andric 18430fca6ea1SDimitry Andric StoreOperands.push_back(DAG.getConstant( 18440fca6ea1SDimitry Andric IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset), 18450fca6ea1SDimitry Andric dl, MVT::i32)); 18460fca6ea1SDimitry Andric } 18470fca6ea1SDimitry Andric 18480b57cec5SDimitry Andric // Record the value to store. 18490b57cec5SDimitry Andric StoreOperands.push_back(StVal); 18500b57cec5SDimitry Andric 18510b57cec5SDimitry Andric if (VectorInfo[j] & PVF_LAST) { 18520b57cec5SDimitry Andric unsigned NumElts = StoreOperands.size() - 3; 18530b57cec5SDimitry Andric NVPTXISD::NodeType Op; 18540b57cec5SDimitry Andric switch (NumElts) { 18550b57cec5SDimitry Andric case 1: 18560b57cec5SDimitry Andric Op = NVPTXISD::StoreParam; 18570b57cec5SDimitry Andric break; 18580b57cec5SDimitry Andric case 2: 18590b57cec5SDimitry Andric Op = NVPTXISD::StoreParamV2; 18600b57cec5SDimitry Andric break; 18610b57cec5SDimitry Andric case 4: 18620b57cec5SDimitry Andric Op = NVPTXISD::StoreParamV4; 18630b57cec5SDimitry Andric break; 18640b57cec5SDimitry Andric default: 18650b57cec5SDimitry Andric llvm_unreachable("Invalid vector info."); 18660b57cec5SDimitry Andric } 18670b57cec5SDimitry Andric 186806c3fb27SDimitry Andric StoreOperands.push_back(InGlue); 18690b57cec5SDimitry Andric 18700b57cec5SDimitry Andric // Adjust type of the store op if we've extended the scalar 18710b57cec5SDimitry Andric // return value. 187281ad6265SDimitry Andric EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT; 18730b57cec5SDimitry Andric 18740b57cec5SDimitry Andric Chain = DAG.getMemIntrinsicNode( 18750b57cec5SDimitry Andric Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, 187681ad6265SDimitry Andric TheStoreType, MachinePointerInfo(), PartAlign, 18770b57cec5SDimitry Andric MachineMemOperand::MOStore); 187806c3fb27SDimitry Andric InGlue = Chain.getValue(1); 18790b57cec5SDimitry Andric 18800b57cec5SDimitry Andric // Cleanup. 18810b57cec5SDimitry Andric StoreOperands.clear(); 1882bdd1243dSDimitry Andric 1883bdd1243dSDimitry Andric // TODO: We may need to support vector types that can be passed 1884bdd1243dSDimitry Andric // as scalars in variadic arguments. 1885bdd1243dSDimitry Andric if (!IsByVal && IsVAArg) { 1886bdd1243dSDimitry Andric assert(NumElts == 1 && 1887bdd1243dSDimitry Andric "Vectorization is expected to be disabled for variadics."); 1888bdd1243dSDimitry Andric VAOffset += DL.getTypeAllocSize( 1889bdd1243dSDimitry Andric TheStoreType.getTypeForEVT(*DAG.getContext())); 1890bdd1243dSDimitry Andric } 18910b57cec5SDimitry Andric } 189281ad6265SDimitry Andric if (!IsByVal) 18930b57cec5SDimitry Andric ++OIdx; 18940b57cec5SDimitry Andric } 18950b57cec5SDimitry Andric assert(StoreOperands.empty() && "Unfinished parameter store."); 189681ad6265SDimitry Andric if (!IsByVal && VTs.size() > 0) 18970b57cec5SDimitry Andric --OIdx; 189881ad6265SDimitry Andric ++ParamCount; 1899bdd1243dSDimitry Andric if (IsByVal && IsVAArg) 1900bdd1243dSDimitry Andric VAOffset += TypeSize; 19010b57cec5SDimitry Andric } 19020b57cec5SDimitry Andric 19030b57cec5SDimitry Andric GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 1904bdd1243dSDimitry Andric MaybeAlign retAlignment = std::nullopt; 19050b57cec5SDimitry Andric 19060b57cec5SDimitry Andric // Handle Result 19070b57cec5SDimitry Andric if (Ins.size() > 0) { 19080b57cec5SDimitry Andric SmallVector<EVT, 16> resvtparts; 19090b57cec5SDimitry Andric ComputeValueVTs(*this, DL, RetTy, resvtparts); 19100b57cec5SDimitry Andric 19110b57cec5SDimitry Andric // Declare 191206c3fb27SDimitry Andric // .param .align N .b8 retval0[<size-in-bytes>], or 19130b57cec5SDimitry Andric // .param .b<size-in-bits> retval0 19140b57cec5SDimitry Andric unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy); 191506c3fb27SDimitry Andric if (!IsTypePassedAsArray(RetTy)) { 1916fcaf7f86SDimitry Andric resultsz = promoteScalarArgumentSize(resultsz); 19170b57cec5SDimitry Andric SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 19180b57cec5SDimitry Andric SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), 19190b57cec5SDimitry Andric DAG.getConstant(resultsz, dl, MVT::i32), 192006c3fb27SDimitry Andric DAG.getConstant(0, dl, MVT::i32), InGlue }; 19210b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 19220b57cec5SDimitry Andric DeclareRetOps); 192306c3fb27SDimitry Andric InGlue = Chain.getValue(1); 19240b57cec5SDimitry Andric } else { 19257a6dacacSDimitry Andric retAlignment = getArgumentAlignment(CB, RetTy, 0, DL); 19265ffd83dbSDimitry Andric assert(retAlignment && "retAlignment is guaranteed to be set"); 19270b57cec5SDimitry Andric SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 19285ffd83dbSDimitry Andric SDValue DeclareRetOps[] = { 19295ffd83dbSDimitry Andric Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32), 19300b57cec5SDimitry Andric DAG.getConstant(resultsz / 8, dl, MVT::i32), 193106c3fb27SDimitry Andric DAG.getConstant(0, dl, MVT::i32), InGlue}; 19320b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 19330b57cec5SDimitry Andric DeclareRetOps); 193406c3fb27SDimitry Andric InGlue = Chain.getValue(1); 19350b57cec5SDimitry Andric } 19360b57cec5SDimitry Andric } 19370b57cec5SDimitry Andric 1938bdd1243dSDimitry Andric bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs); 1939bdd1243dSDimitry Andric // Set the size of the vararg param byte array if the callee is a variadic 1940bdd1243dSDimitry Andric // function and the variadic part is not empty. 1941bdd1243dSDimitry Andric if (HasVAArgs) { 1942bdd1243dSDimitry Andric SDValue DeclareParamOps[] = { 1943bdd1243dSDimitry Andric VADeclareParam.getOperand(0), VADeclareParam.getOperand(1), 1944bdd1243dSDimitry Andric VADeclareParam.getOperand(2), DAG.getConstant(VAOffset, dl, MVT::i32), 1945bdd1243dSDimitry Andric VADeclareParam.getOperand(4)}; 1946bdd1243dSDimitry Andric DAG.MorphNodeTo(VADeclareParam.getNode(), VADeclareParam.getOpcode(), 1947bdd1243dSDimitry Andric VADeclareParam->getVTList(), DeclareParamOps); 1948bdd1243dSDimitry Andric } 1949bdd1243dSDimitry Andric 19500b57cec5SDimitry Andric // Both indirect calls and libcalls have nullptr Func. In order to distinguish 19510b57cec5SDimitry Andric // between them we must rely on the call site value which is valid for 19520b57cec5SDimitry Andric // indirect calls but is always null for libcalls. 19535ffd83dbSDimitry Andric bool isIndirectCall = !Func && CB; 19540b57cec5SDimitry Andric 19550b57cec5SDimitry Andric if (isa<ExternalSymbolSDNode>(Callee)) { 19560b57cec5SDimitry Andric Function* CalleeFunc = nullptr; 19570b57cec5SDimitry Andric 19580b57cec5SDimitry Andric // Try to find the callee in the current module. 19590b57cec5SDimitry Andric Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc); 19600b57cec5SDimitry Andric assert(CalleeFunc != nullptr && "Libcall callee must be set."); 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric // Set the "libcall callee" attribute to indicate that the function 19630b57cec5SDimitry Andric // must always have a declaration. 19640b57cec5SDimitry Andric CalleeFunc->addFnAttr("nvptx-libcall-callee", "true"); 19650b57cec5SDimitry Andric } 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric if (isIndirectCall) { 19680b57cec5SDimitry Andric // This is indirect function call case : PTX requires a prototype of the 19690b57cec5SDimitry Andric // form 19700b57cec5SDimitry Andric // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 19710b57cec5SDimitry Andric // to be emitted, and the label has to used as the last arg of call 19720b57cec5SDimitry Andric // instruction. 19730b57cec5SDimitry Andric // The prototype is embedded in a string and put as the operand for a 19740b57cec5SDimitry Andric // CallPrototype SDNode which will print out to the value of the string. 19750b57cec5SDimitry Andric SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1976bdd1243dSDimitry Andric std::string Proto = getPrototype( 1977bdd1243dSDimitry Andric DL, RetTy, Args, Outs, retAlignment, 1978bdd1243dSDimitry Andric HasVAArgs 1979bdd1243dSDimitry Andric ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair( 1980297eecfbSDimitry Andric CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1))) 1981bdd1243dSDimitry Andric : std::nullopt, 1982bdd1243dSDimitry Andric *CB, UniqueCallSite); 1983bdd1243dSDimitry Andric const char *ProtoStr = nvTM->getStrPool().save(Proto).data(); 19840b57cec5SDimitry Andric SDValue ProtoOps[] = { 1985bdd1243dSDimitry Andric Chain, 1986bdd1243dSDimitry Andric DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), 198706c3fb27SDimitry Andric InGlue, 19880b57cec5SDimitry Andric }; 19890b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); 199006c3fb27SDimitry Andric InGlue = Chain.getValue(1); 19910b57cec5SDimitry Andric } 19920b57cec5SDimitry Andric // Op to just print "call" 19930b57cec5SDimitry Andric SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 19940b57cec5SDimitry Andric SDValue PrintCallOps[] = { 199506c3fb27SDimitry Andric Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InGlue 19960b57cec5SDimitry Andric }; 19970b57cec5SDimitry Andric // We model convergent calls as separate opcodes. 19980b57cec5SDimitry Andric unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni; 19990b57cec5SDimitry Andric if (CLI.IsConvergent) 20000b57cec5SDimitry Andric Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni 20010b57cec5SDimitry Andric : NVPTXISD::PrintConvergentCall; 20020b57cec5SDimitry Andric Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps); 200306c3fb27SDimitry Andric InGlue = Chain.getValue(1); 20040b57cec5SDimitry Andric 20050b57cec5SDimitry Andric // Ops to print out the function name 20060b57cec5SDimitry Andric SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 200706c3fb27SDimitry Andric SDValue CallVoidOps[] = { Chain, Callee, InGlue }; 20080b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); 200906c3fb27SDimitry Andric InGlue = Chain.getValue(1); 20100b57cec5SDimitry Andric 20110b57cec5SDimitry Andric // Ops to print out the param list 20120b57cec5SDimitry Andric SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 201306c3fb27SDimitry Andric SDValue CallArgBeginOps[] = { Chain, InGlue }; 20140b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 20150b57cec5SDimitry Andric CallArgBeginOps); 201606c3fb27SDimitry Andric InGlue = Chain.getValue(1); 20170b57cec5SDimitry Andric 2018bdd1243dSDimitry Andric for (unsigned i = 0, e = std::min(CLI.NumFixedArgs + 1, ParamCount); i != e; 2019bdd1243dSDimitry Andric ++i) { 20200b57cec5SDimitry Andric unsigned opcode; 20210b57cec5SDimitry Andric if (i == (e - 1)) 20220b57cec5SDimitry Andric opcode = NVPTXISD::LastCallArg; 20230b57cec5SDimitry Andric else 20240b57cec5SDimitry Andric opcode = NVPTXISD::CallArg; 20250b57cec5SDimitry Andric SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 20260b57cec5SDimitry Andric SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), 202706c3fb27SDimitry Andric DAG.getConstant(i, dl, MVT::i32), InGlue }; 20280b57cec5SDimitry Andric Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); 202906c3fb27SDimitry Andric InGlue = Chain.getValue(1); 20300b57cec5SDimitry Andric } 20310b57cec5SDimitry Andric SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 20320b57cec5SDimitry Andric SDValue CallArgEndOps[] = { Chain, 20330b57cec5SDimitry Andric DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32), 203406c3fb27SDimitry Andric InGlue }; 20350b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); 203606c3fb27SDimitry Andric InGlue = Chain.getValue(1); 20370b57cec5SDimitry Andric 20380b57cec5SDimitry Andric if (isIndirectCall) { 20390b57cec5SDimitry Andric SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 2040e8d8bef9SDimitry Andric SDValue PrototypeOps[] = { 204106c3fb27SDimitry Andric Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InGlue}; 20420b57cec5SDimitry Andric Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); 204306c3fb27SDimitry Andric InGlue = Chain.getValue(1); 20440b57cec5SDimitry Andric } 20450b57cec5SDimitry Andric 20460b57cec5SDimitry Andric SmallVector<SDValue, 16> ProxyRegOps; 2047bdd1243dSDimitry Andric SmallVector<std::optional<MVT>, 16> ProxyRegTruncates; 20480fca6ea1SDimitry Andric // An item of the vector is filled if the element does not need a ProxyReg 20490fca6ea1SDimitry Andric // operation on it and should be added to InVals as is. ProxyRegOps and 20500fca6ea1SDimitry Andric // ProxyRegTruncates contain empty/none items at the same index. 20510fca6ea1SDimitry Andric SmallVector<SDValue, 16> RetElts; 20520fca6ea1SDimitry Andric // A temporary ProxyReg operations inserted in `LowerUnalignedLoadRetParam()` 20530fca6ea1SDimitry Andric // to use the values of `LoadParam`s and to be replaced later then 20540fca6ea1SDimitry Andric // `CALLSEQ_END` is added. 20550fca6ea1SDimitry Andric SmallVector<SDValue, 16> TempProxyRegOps; 20560b57cec5SDimitry Andric 20570b57cec5SDimitry Andric // Generate loads from param memory/moves from registers for result 20580b57cec5SDimitry Andric if (Ins.size() > 0) { 20590b57cec5SDimitry Andric SmallVector<EVT, 16> VTs; 20600b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets; 20610b57cec5SDimitry Andric ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0); 20620b57cec5SDimitry Andric assert(VTs.size() == Ins.size() && "Bad value decomposition"); 20630b57cec5SDimitry Andric 20647a6dacacSDimitry Andric Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL); 20650b57cec5SDimitry Andric auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); 20660b57cec5SDimitry Andric 20670b57cec5SDimitry Andric SmallVector<EVT, 6> LoadVTs; 20680b57cec5SDimitry Andric int VecIdx = -1; // Index of the first element of the vector. 20690b57cec5SDimitry Andric 20700b57cec5SDimitry Andric // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than 20710b57cec5SDimitry Andric // 32-bits are sign extended or zero extended, depending on whether 20720b57cec5SDimitry Andric // they are signed or unsigned types. 20730b57cec5SDimitry Andric bool ExtendIntegerRetVal = 20740b57cec5SDimitry Andric RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; 20750b57cec5SDimitry Andric 20760b57cec5SDimitry Andric for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 20770b57cec5SDimitry Andric bool needTruncate = false; 20780b57cec5SDimitry Andric EVT TheLoadType = VTs[i]; 20790b57cec5SDimitry Andric EVT EltType = Ins[i].VT; 20805ffd83dbSDimitry Andric Align EltAlign = commonAlignment(RetAlign, Offsets[i]); 2081fcaf7f86SDimitry Andric MVT PromotedVT; 2082fcaf7f86SDimitry Andric 2083fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(TheLoadType, &PromotedVT)) { 2084fcaf7f86SDimitry Andric TheLoadType = EVT(PromotedVT); 2085fcaf7f86SDimitry Andric EltType = EVT(PromotedVT); 2086fcaf7f86SDimitry Andric needTruncate = true; 2087fcaf7f86SDimitry Andric } 2088fcaf7f86SDimitry Andric 20890b57cec5SDimitry Andric if (ExtendIntegerRetVal) { 20900b57cec5SDimitry Andric TheLoadType = MVT::i32; 20910b57cec5SDimitry Andric EltType = MVT::i32; 20920b57cec5SDimitry Andric needTruncate = true; 20930b57cec5SDimitry Andric } else if (TheLoadType.getSizeInBits() < 16) { 20940b57cec5SDimitry Andric if (VTs[i].isInteger()) 20950b57cec5SDimitry Andric needTruncate = true; 20960b57cec5SDimitry Andric EltType = MVT::i16; 20970b57cec5SDimitry Andric } 20980b57cec5SDimitry Andric 20990fca6ea1SDimitry Andric // If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a 21000fca6ea1SDimitry Andric // scalar load. In such cases, fall back to byte loads. 21010fca6ea1SDimitry Andric if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType() && 21020fca6ea1SDimitry Andric EltAlign < DL.getABITypeAlign( 21030fca6ea1SDimitry Andric TheLoadType.getTypeForEVT(*DAG.getContext()))) { 21040fca6ea1SDimitry Andric assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list."); 21050fca6ea1SDimitry Andric SDValue Ret = LowerUnalignedLoadRetParam( 21060fca6ea1SDimitry Andric DAG, Chain, Offsets[i], TheLoadType, InGlue, TempProxyRegOps, dl); 21070fca6ea1SDimitry Andric ProxyRegOps.push_back(SDValue()); 21080fca6ea1SDimitry Andric ProxyRegTruncates.push_back(std::optional<MVT>()); 21090fca6ea1SDimitry Andric RetElts.resize(i); 21100fca6ea1SDimitry Andric RetElts.push_back(Ret); 21110fca6ea1SDimitry Andric 21120fca6ea1SDimitry Andric continue; 21130fca6ea1SDimitry Andric } 21140fca6ea1SDimitry Andric 21150b57cec5SDimitry Andric // Record index of the very first element of the vector. 21160b57cec5SDimitry Andric if (VectorInfo[i] & PVF_FIRST) { 21170b57cec5SDimitry Andric assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list."); 21180b57cec5SDimitry Andric VecIdx = i; 21190b57cec5SDimitry Andric } 21200b57cec5SDimitry Andric 21210b57cec5SDimitry Andric LoadVTs.push_back(EltType); 21220b57cec5SDimitry Andric 21230b57cec5SDimitry Andric if (VectorInfo[i] & PVF_LAST) { 21240b57cec5SDimitry Andric unsigned NumElts = LoadVTs.size(); 21250b57cec5SDimitry Andric LoadVTs.push_back(MVT::Other); 21260b57cec5SDimitry Andric LoadVTs.push_back(MVT::Glue); 21270b57cec5SDimitry Andric NVPTXISD::NodeType Op; 21280b57cec5SDimitry Andric switch (NumElts) { 21290b57cec5SDimitry Andric case 1: 21300b57cec5SDimitry Andric Op = NVPTXISD::LoadParam; 21310b57cec5SDimitry Andric break; 21320b57cec5SDimitry Andric case 2: 21330b57cec5SDimitry Andric Op = NVPTXISD::LoadParamV2; 21340b57cec5SDimitry Andric break; 21350b57cec5SDimitry Andric case 4: 21360b57cec5SDimitry Andric Op = NVPTXISD::LoadParamV4; 21370b57cec5SDimitry Andric break; 21380b57cec5SDimitry Andric default: 21390b57cec5SDimitry Andric llvm_unreachable("Invalid vector info."); 21400b57cec5SDimitry Andric } 21410b57cec5SDimitry Andric 21420b57cec5SDimitry Andric SDValue LoadOperands[] = { 21430b57cec5SDimitry Andric Chain, DAG.getConstant(1, dl, MVT::i32), 214406c3fb27SDimitry Andric DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InGlue}; 21450b57cec5SDimitry Andric SDValue RetVal = DAG.getMemIntrinsicNode( 21460b57cec5SDimitry Andric Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType, 21470b57cec5SDimitry Andric MachinePointerInfo(), EltAlign, 21480b57cec5SDimitry Andric MachineMemOperand::MOLoad); 21490b57cec5SDimitry Andric 21500b57cec5SDimitry Andric for (unsigned j = 0; j < NumElts; ++j) { 21510b57cec5SDimitry Andric ProxyRegOps.push_back(RetVal.getValue(j)); 21520b57cec5SDimitry Andric 21530b57cec5SDimitry Andric if (needTruncate) 2154bdd1243dSDimitry Andric ProxyRegTruncates.push_back(std::optional<MVT>(Ins[VecIdx + j].VT)); 21550b57cec5SDimitry Andric else 2156bdd1243dSDimitry Andric ProxyRegTruncates.push_back(std::optional<MVT>()); 21570b57cec5SDimitry Andric } 21580b57cec5SDimitry Andric 21590b57cec5SDimitry Andric Chain = RetVal.getValue(NumElts); 216006c3fb27SDimitry Andric InGlue = RetVal.getValue(NumElts + 1); 21610b57cec5SDimitry Andric 21620b57cec5SDimitry Andric // Cleanup 21630b57cec5SDimitry Andric VecIdx = -1; 21640b57cec5SDimitry Andric LoadVTs.clear(); 21650b57cec5SDimitry Andric } 21660b57cec5SDimitry Andric } 21670b57cec5SDimitry Andric } 21680b57cec5SDimitry Andric 2169bdd1243dSDimitry Andric Chain = 217006c3fb27SDimitry Andric DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InGlue, dl); 217106c3fb27SDimitry Andric InGlue = Chain.getValue(1); 21720b57cec5SDimitry Andric 21730b57cec5SDimitry Andric // Append ProxyReg instructions to the chain to make sure that `callseq_end` 21740b57cec5SDimitry Andric // will not get lost. Otherwise, during libcalls expansion, the nodes can become 21750b57cec5SDimitry Andric // dangling. 21760b57cec5SDimitry Andric for (unsigned i = 0; i < ProxyRegOps.size(); ++i) { 21770fca6ea1SDimitry Andric if (i < RetElts.size() && RetElts[i]) { 21780fca6ea1SDimitry Andric InVals.push_back(RetElts[i]); 21790fca6ea1SDimitry Andric continue; 21800fca6ea1SDimitry Andric } 21810fca6ea1SDimitry Andric 21820b57cec5SDimitry Andric SDValue Ret = DAG.getNode( 21830b57cec5SDimitry Andric NVPTXISD::ProxyReg, dl, 21840b57cec5SDimitry Andric DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue), 218506c3fb27SDimitry Andric { Chain, ProxyRegOps[i], InGlue } 21860b57cec5SDimitry Andric ); 21870b57cec5SDimitry Andric 21880b57cec5SDimitry Andric Chain = Ret.getValue(1); 218906c3fb27SDimitry Andric InGlue = Ret.getValue(2); 21900b57cec5SDimitry Andric 219181ad6265SDimitry Andric if (ProxyRegTruncates[i]) { 2192bdd1243dSDimitry Andric Ret = DAG.getNode(ISD::TRUNCATE, dl, *ProxyRegTruncates[i], Ret); 21930b57cec5SDimitry Andric } 21940b57cec5SDimitry Andric 21950b57cec5SDimitry Andric InVals.push_back(Ret); 21960b57cec5SDimitry Andric } 21970b57cec5SDimitry Andric 21980fca6ea1SDimitry Andric for (SDValue &T : TempProxyRegOps) { 21990fca6ea1SDimitry Andric SDValue Repl = DAG.getNode( 22000fca6ea1SDimitry Andric NVPTXISD::ProxyReg, dl, 22010fca6ea1SDimitry Andric DAG.getVTList(T.getSimpleValueType(), MVT::Other, MVT::Glue), 22020fca6ea1SDimitry Andric {Chain, T.getOperand(0), InGlue}); 22030fca6ea1SDimitry Andric DAG.ReplaceAllUsesWith(T, Repl); 22040fca6ea1SDimitry Andric DAG.RemoveDeadNode(T.getNode()); 22050fca6ea1SDimitry Andric 22060fca6ea1SDimitry Andric Chain = Repl.getValue(1); 22070fca6ea1SDimitry Andric InGlue = Repl.getValue(2); 22080fca6ea1SDimitry Andric } 22090fca6ea1SDimitry Andric 22100b57cec5SDimitry Andric // set isTailCall to false for now, until we figure out how to express 22110b57cec5SDimitry Andric // tail call optimization in PTX 22120b57cec5SDimitry Andric isTailCall = false; 22130b57cec5SDimitry Andric return Chain; 22140b57cec5SDimitry Andric } 22150b57cec5SDimitry Andric 22165f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 22175f757f3fSDimitry Andric SelectionDAG &DAG) const { 22180fca6ea1SDimitry Andric 22190fca6ea1SDimitry Andric if (STI.getPTXVersion() < 73 || STI.getSmVersion() < 52) { 22205f757f3fSDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 22215f757f3fSDimitry Andric 22225f757f3fSDimitry Andric DiagnosticInfoUnsupported NoDynamicAlloca( 22230fca6ea1SDimitry Andric Fn, 22240fca6ea1SDimitry Andric "Support for dynamic alloca introduced in PTX ISA version 7.3 and " 22250fca6ea1SDimitry Andric "requires target sm_52.", 22265f757f3fSDimitry Andric SDLoc(Op).getDebugLoc()); 22275f757f3fSDimitry Andric DAG.getContext()->diagnose(NoDynamicAlloca); 22280fca6ea1SDimitry Andric auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), 22290fca6ea1SDimitry Andric Op.getOperand(0)}; 22305f757f3fSDimitry Andric return DAG.getMergeValues(Ops, SDLoc()); 22315f757f3fSDimitry Andric } 22325f757f3fSDimitry Andric 22330fca6ea1SDimitry Andric SDValue Chain = Op.getOperand(0); 22340fca6ea1SDimitry Andric SDValue Size = Op.getOperand(1); 22350fca6ea1SDimitry Andric uint64_t Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 22360fca6ea1SDimitry Andric SDLoc DL(Op.getNode()); 22370fca6ea1SDimitry Andric 22380fca6ea1SDimitry Andric // The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32. 22390fca6ea1SDimitry Andric if (nvTM->is64Bit()) 22400fca6ea1SDimitry Andric Size = DAG.getZExtOrTrunc(Size, DL, MVT::i64); 22410fca6ea1SDimitry Andric else 22420fca6ea1SDimitry Andric Size = DAG.getZExtOrTrunc(Size, DL, MVT::i32); 22430fca6ea1SDimitry Andric 22440fca6ea1SDimitry Andric SDValue AllocOps[] = {Chain, Size, 22450fca6ea1SDimitry Andric DAG.getTargetConstant(Align, DL, MVT::i32)}; 22460fca6ea1SDimitry Andric SDValue Alloca = DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL, 22470fca6ea1SDimitry Andric nvTM->is64Bit() ? MVT::i64 : MVT::i32, AllocOps); 22480fca6ea1SDimitry Andric 22490fca6ea1SDimitry Andric SDValue MergeOps[] = {Alloca, Chain}; 22500fca6ea1SDimitry Andric return DAG.getMergeValues(MergeOps, DL); 22510fca6ea1SDimitry Andric } 22520fca6ea1SDimitry Andric 22530b57cec5SDimitry Andric // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 22540b57cec5SDimitry Andric // (see LegalizeDAG.cpp). This is slow and uses local memory. 22550b57cec5SDimitry Andric // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 22560b57cec5SDimitry Andric SDValue 22570b57cec5SDimitry Andric NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 22580b57cec5SDimitry Andric SDNode *Node = Op.getNode(); 22590b57cec5SDimitry Andric SDLoc dl(Node); 22600b57cec5SDimitry Andric SmallVector<SDValue, 8> Ops; 22610b57cec5SDimitry Andric unsigned NumOperands = Node->getNumOperands(); 22620b57cec5SDimitry Andric for (unsigned i = 0; i < NumOperands; ++i) { 22630b57cec5SDimitry Andric SDValue SubOp = Node->getOperand(i); 22640b57cec5SDimitry Andric EVT VVT = SubOp.getNode()->getValueType(0); 22650b57cec5SDimitry Andric EVT EltVT = VVT.getVectorElementType(); 22660b57cec5SDimitry Andric unsigned NumSubElem = VVT.getVectorNumElements(); 22670b57cec5SDimitry Andric for (unsigned j = 0; j < NumSubElem; ++j) { 22680b57cec5SDimitry Andric Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 22690b57cec5SDimitry Andric DAG.getIntPtrConstant(j, dl))); 22700b57cec5SDimitry Andric } 22710b57cec5SDimitry Andric } 22720b57cec5SDimitry Andric return DAG.getBuildVector(Node->getValueType(0), dl, Ops); 22730b57cec5SDimitry Andric } 22740b57cec5SDimitry Andric 22755f757f3fSDimitry Andric // We can init constant f16x2/v2i16/v4i8 with a single .b32 move. Normally it 22760b57cec5SDimitry Andric // would get lowered as two constant loads and vector-packing move. 22770b57cec5SDimitry Andric // Instead we want just a constant move: 22785f757f3fSDimitry Andric // mov.b32 %r2, 0x40003C00 22790b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, 22800b57cec5SDimitry Andric SelectionDAG &DAG) const { 22815f757f3fSDimitry Andric EVT VT = Op->getValueType(0); 22825f757f3fSDimitry Andric if (!(Isv2x16VT(VT) || VT == MVT::v4i8)) 22830b57cec5SDimitry Andric return Op; 22840b57cec5SDimitry Andric 22855f757f3fSDimitry Andric SDLoc DL(Op); 22865f757f3fSDimitry Andric 22875f757f3fSDimitry Andric if (!llvm::all_of(Op->ops(), [](SDValue Operand) { 22885f757f3fSDimitry Andric return Operand->isUndef() || isa<ConstantSDNode>(Operand) || 22895f757f3fSDimitry Andric isa<ConstantFPSDNode>(Operand); 22905f757f3fSDimitry Andric })) { 22915f757f3fSDimitry Andric // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us 22925f757f3fSDimitry Andric // to optimize calculation of constant parts. 22935f757f3fSDimitry Andric if (VT == MVT::v4i8) { 22945f757f3fSDimitry Andric SDValue C8 = DAG.getConstant(8, DL, MVT::i32); 22955f757f3fSDimitry Andric SDValue E01 = DAG.getNode( 22965f757f3fSDimitry Andric NVPTXISD::BFI, DL, MVT::i32, 22975f757f3fSDimitry Andric DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32), 22985f757f3fSDimitry Andric DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8); 22995f757f3fSDimitry Andric SDValue E012 = 23005f757f3fSDimitry Andric DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, 23015f757f3fSDimitry Andric DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32), 23025f757f3fSDimitry Andric E01, DAG.getConstant(16, DL, MVT::i32), C8); 23035f757f3fSDimitry Andric SDValue E0123 = 23045f757f3fSDimitry Andric DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, 23055f757f3fSDimitry Andric DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32), 23065f757f3fSDimitry Andric E012, DAG.getConstant(24, DL, MVT::i32), C8); 23075f757f3fSDimitry Andric return DAG.getNode(ISD::BITCAST, DL, VT, E0123); 23085f757f3fSDimitry Andric } 23095f757f3fSDimitry Andric return Op; 23105f757f3fSDimitry Andric } 23115f757f3fSDimitry Andric 23125f757f3fSDimitry Andric // Get value or the Nth operand as an APInt(32). Undef values treated as 0. 23135f757f3fSDimitry Andric auto GetOperand = [](SDValue Op, int N) -> APInt { 23145f757f3fSDimitry Andric const SDValue &Operand = Op->getOperand(N); 23155f757f3fSDimitry Andric EVT VT = Op->getValueType(0); 23165f757f3fSDimitry Andric if (Operand->isUndef()) 23175f757f3fSDimitry Andric return APInt(32, 0); 23185f757f3fSDimitry Andric APInt Value; 23195f757f3fSDimitry Andric if (VT == MVT::v2f16 || VT == MVT::v2bf16) 23205f757f3fSDimitry Andric Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt(); 23215f757f3fSDimitry Andric else if (VT == MVT::v2i16 || VT == MVT::v4i8) 2322297eecfbSDimitry Andric Value = Operand->getAsAPIntVal(); 23235f757f3fSDimitry Andric else 23245f757f3fSDimitry Andric llvm_unreachable("Unsupported type"); 23255f757f3fSDimitry Andric // i8 values are carried around as i16, so we need to zero out upper bits, 23265f757f3fSDimitry Andric // so they do not get in the way of combining individual byte values 23275f757f3fSDimitry Andric if (VT == MVT::v4i8) 23285f757f3fSDimitry Andric Value = Value.trunc(8); 23295f757f3fSDimitry Andric return Value.zext(32); 23305f757f3fSDimitry Andric }; 23315f757f3fSDimitry Andric APInt Value; 23325f757f3fSDimitry Andric if (Isv2x16VT(VT)) { 23335f757f3fSDimitry Andric Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(16); 23345f757f3fSDimitry Andric } else if (VT == MVT::v4i8) { 23355f757f3fSDimitry Andric Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(8) | 23365f757f3fSDimitry Andric GetOperand(Op, 2).shl(16) | GetOperand(Op, 3).shl(24); 23375f757f3fSDimitry Andric } else { 23385f757f3fSDimitry Andric llvm_unreachable("Unsupported type"); 23395f757f3fSDimitry Andric } 23405f757f3fSDimitry Andric SDValue Const = DAG.getConstant(Value, SDLoc(Op), MVT::i32); 234106c3fb27SDimitry Andric return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const); 23420b57cec5SDimitry Andric } 23430b57cec5SDimitry Andric 23440b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 23450b57cec5SDimitry Andric SelectionDAG &DAG) const { 23460b57cec5SDimitry Andric SDValue Index = Op->getOperand(1); 23475f757f3fSDimitry Andric SDValue Vector = Op->getOperand(0); 23485f757f3fSDimitry Andric SDLoc DL(Op); 23495f757f3fSDimitry Andric EVT VectorVT = Vector.getValueType(); 23505f757f3fSDimitry Andric 23515f757f3fSDimitry Andric if (VectorVT == MVT::v4i8) { 23525f757f3fSDimitry Andric SDValue BFE = 23535f757f3fSDimitry Andric DAG.getNode(NVPTXISD::BFE, DL, MVT::i32, 23545f757f3fSDimitry Andric {Vector, 23555f757f3fSDimitry Andric DAG.getNode(ISD::MUL, DL, MVT::i32, 23565f757f3fSDimitry Andric DAG.getZExtOrTrunc(Index, DL, MVT::i32), 23575f757f3fSDimitry Andric DAG.getConstant(8, DL, MVT::i32)), 23585f757f3fSDimitry Andric DAG.getConstant(8, DL, MVT::i32)}); 23595f757f3fSDimitry Andric return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0)); 23605f757f3fSDimitry Andric } 23615f757f3fSDimitry Andric 23620b57cec5SDimitry Andric // Constant index will be matched by tablegen. 23630b57cec5SDimitry Andric if (isa<ConstantSDNode>(Index.getNode())) 23640b57cec5SDimitry Andric return Op; 23650b57cec5SDimitry Andric 23660b57cec5SDimitry Andric // Extract individual elements and select one of them. 23675f757f3fSDimitry Andric assert(Isv2x16VT(VectorVT) && "Unexpected vector type."); 23680b57cec5SDimitry Andric EVT EltVT = VectorVT.getVectorElementType(); 23690b57cec5SDimitry Andric 23700b57cec5SDimitry Andric SDLoc dl(Op.getNode()); 23710b57cec5SDimitry Andric SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, 23720b57cec5SDimitry Andric DAG.getIntPtrConstant(0, dl)); 23730b57cec5SDimitry Andric SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, 23740b57cec5SDimitry Andric DAG.getIntPtrConstant(1, dl)); 23750b57cec5SDimitry Andric return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1, 23760b57cec5SDimitry Andric ISD::CondCode::SETEQ); 23770b57cec5SDimitry Andric } 23780b57cec5SDimitry Andric 23795f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 23805f757f3fSDimitry Andric SelectionDAG &DAG) const { 23815f757f3fSDimitry Andric SDValue Vector = Op->getOperand(0); 23825f757f3fSDimitry Andric EVT VectorVT = Vector.getValueType(); 23835f757f3fSDimitry Andric 23845f757f3fSDimitry Andric if (VectorVT != MVT::v4i8) 23855f757f3fSDimitry Andric return Op; 23865f757f3fSDimitry Andric SDLoc DL(Op); 23875f757f3fSDimitry Andric SDValue Value = Op->getOperand(1); 23885f757f3fSDimitry Andric if (Value->isUndef()) 23895f757f3fSDimitry Andric return Vector; 23905f757f3fSDimitry Andric 23915f757f3fSDimitry Andric SDValue Index = Op->getOperand(2); 23925f757f3fSDimitry Andric 23935f757f3fSDimitry Andric SDValue BFI = 23945f757f3fSDimitry Andric DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, 23955f757f3fSDimitry Andric {DAG.getZExtOrTrunc(Value, DL, MVT::i32), Vector, 23965f757f3fSDimitry Andric DAG.getNode(ISD::MUL, DL, MVT::i32, 23975f757f3fSDimitry Andric DAG.getZExtOrTrunc(Index, DL, MVT::i32), 23985f757f3fSDimitry Andric DAG.getConstant(8, DL, MVT::i32)), 23995f757f3fSDimitry Andric DAG.getConstant(8, DL, MVT::i32)}); 24005f757f3fSDimitry Andric return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI); 24015f757f3fSDimitry Andric } 24025f757f3fSDimitry Andric 24035f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 24045f757f3fSDimitry Andric SelectionDAG &DAG) const { 24055f757f3fSDimitry Andric SDValue V1 = Op.getOperand(0); 24065f757f3fSDimitry Andric EVT VectorVT = V1.getValueType(); 24075f757f3fSDimitry Andric if (VectorVT != MVT::v4i8 || Op.getValueType() != MVT::v4i8) 24085f757f3fSDimitry Andric return Op; 24095f757f3fSDimitry Andric 24105f757f3fSDimitry Andric // Lower shuffle to PRMT instruction. 24115f757f3fSDimitry Andric const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 24125f757f3fSDimitry Andric SDValue V2 = Op.getOperand(1); 24135f757f3fSDimitry Andric uint32_t Selector = 0; 24147a6dacacSDimitry Andric for (auto I : llvm::enumerate(SVN->getMask())) { 24157a6dacacSDimitry Andric if (I.value() != -1) // -1 is a placeholder for undef. 24165f757f3fSDimitry Andric Selector |= (I.value() << (I.index() * 4)); 24177a6dacacSDimitry Andric } 24185f757f3fSDimitry Andric 24195f757f3fSDimitry Andric SDLoc DL(Op); 24205f757f3fSDimitry Andric return DAG.getNode(NVPTXISD::PRMT, DL, MVT::v4i8, V1, V2, 24215f757f3fSDimitry Andric DAG.getConstant(Selector, DL, MVT::i32), 24225f757f3fSDimitry Andric DAG.getConstant(NVPTX::PTXPrmtMode::NONE, DL, MVT::i32)); 24235f757f3fSDimitry Andric } 24240b57cec5SDimitry Andric /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which 24250b57cec5SDimitry Andric /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 24260b57cec5SDimitry Andric /// amount, or 24270b57cec5SDimitry Andric /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 24280b57cec5SDimitry Andric /// amount. 24290b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, 24300b57cec5SDimitry Andric SelectionDAG &DAG) const { 24310b57cec5SDimitry Andric assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 24320b57cec5SDimitry Andric assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 24330b57cec5SDimitry Andric 24340b57cec5SDimitry Andric EVT VT = Op.getValueType(); 24350b57cec5SDimitry Andric unsigned VTBits = VT.getSizeInBits(); 24360b57cec5SDimitry Andric SDLoc dl(Op); 24370b57cec5SDimitry Andric SDValue ShOpLo = Op.getOperand(0); 24380b57cec5SDimitry Andric SDValue ShOpHi = Op.getOperand(1); 24390b57cec5SDimitry Andric SDValue ShAmt = Op.getOperand(2); 24400b57cec5SDimitry Andric unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 24410b57cec5SDimitry Andric 24420b57cec5SDimitry Andric if (VTBits == 32 && STI.getSmVersion() >= 35) { 24430b57cec5SDimitry Andric // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 24440b57cec5SDimitry Andric // {dHi, dLo} = {aHi, aLo} >> Amt 24450b57cec5SDimitry Andric // dHi = aHi >> Amt 24460b57cec5SDimitry Andric // dLo = shf.r.clamp aLo, aHi, Amt 24470b57cec5SDimitry Andric 24480b57cec5SDimitry Andric SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 24490b57cec5SDimitry Andric SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, 24500b57cec5SDimitry Andric ShAmt); 24510b57cec5SDimitry Andric 24520b57cec5SDimitry Andric SDValue Ops[2] = { Lo, Hi }; 24530b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl); 24540b57cec5SDimitry Andric } 24550b57cec5SDimitry Andric else { 24560b57cec5SDimitry Andric // {dHi, dLo} = {aHi, aLo} >> Amt 24570b57cec5SDimitry Andric // - if (Amt>=size) then 24580b57cec5SDimitry Andric // dLo = aHi >> (Amt-size) 24590b57cec5SDimitry Andric // dHi = aHi >> Amt (this is either all 0 or all 1) 24600b57cec5SDimitry Andric // else 24610b57cec5SDimitry Andric // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) 24620b57cec5SDimitry Andric // dHi = aHi >> Amt 24630b57cec5SDimitry Andric 24640b57cec5SDimitry Andric SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 24650b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32), 24660b57cec5SDimitry Andric ShAmt); 24670b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 24680b57cec5SDimitry Andric SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 24690b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32)); 24700b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 24710b57cec5SDimitry Andric SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 24720b57cec5SDimitry Andric SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 24730b57cec5SDimitry Andric 24740b57cec5SDimitry Andric SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 24750b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32), 24760b57cec5SDimitry Andric ISD::SETGE); 24770b57cec5SDimitry Andric SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 24780b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 24790b57cec5SDimitry Andric 24800b57cec5SDimitry Andric SDValue Ops[2] = { Lo, Hi }; 24810b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl); 24820b57cec5SDimitry Andric } 24830b57cec5SDimitry Andric } 24840b57cec5SDimitry Andric 24850b57cec5SDimitry Andric /// LowerShiftLeftParts - Lower SHL_PARTS, which 24860b57cec5SDimitry Andric /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 24870b57cec5SDimitry Andric /// amount, or 24880b57cec5SDimitry Andric /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 24890b57cec5SDimitry Andric /// amount. 24900b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, 24910b57cec5SDimitry Andric SelectionDAG &DAG) const { 24920b57cec5SDimitry Andric assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 24930b57cec5SDimitry Andric assert(Op.getOpcode() == ISD::SHL_PARTS); 24940b57cec5SDimitry Andric 24950b57cec5SDimitry Andric EVT VT = Op.getValueType(); 24960b57cec5SDimitry Andric unsigned VTBits = VT.getSizeInBits(); 24970b57cec5SDimitry Andric SDLoc dl(Op); 24980b57cec5SDimitry Andric SDValue ShOpLo = Op.getOperand(0); 24990b57cec5SDimitry Andric SDValue ShOpHi = Op.getOperand(1); 25000b57cec5SDimitry Andric SDValue ShAmt = Op.getOperand(2); 25010b57cec5SDimitry Andric 25020b57cec5SDimitry Andric if (VTBits == 32 && STI.getSmVersion() >= 35) { 25030b57cec5SDimitry Andric // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 25040b57cec5SDimitry Andric // {dHi, dLo} = {aHi, aLo} << Amt 25050b57cec5SDimitry Andric // dHi = shf.l.clamp aLo, aHi, Amt 25060b57cec5SDimitry Andric // dLo = aLo << Amt 25070b57cec5SDimitry Andric 25080b57cec5SDimitry Andric SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, 25090b57cec5SDimitry Andric ShAmt); 25100b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 25110b57cec5SDimitry Andric 25120b57cec5SDimitry Andric SDValue Ops[2] = { Lo, Hi }; 25130b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl); 25140b57cec5SDimitry Andric } 25150b57cec5SDimitry Andric else { 25160b57cec5SDimitry Andric // {dHi, dLo} = {aHi, aLo} << Amt 25170b57cec5SDimitry Andric // - if (Amt>=size) then 25180b57cec5SDimitry Andric // dLo = aLo << Amt (all 0) 25190b57cec5SDimitry Andric // dLo = aLo << (Amt-size) 25200b57cec5SDimitry Andric // else 25210b57cec5SDimitry Andric // dLo = aLo << Amt 25220b57cec5SDimitry Andric // dHi = (aHi << Amt) | (aLo >> (size-Amt)) 25230b57cec5SDimitry Andric 25240b57cec5SDimitry Andric SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 25250b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32), 25260b57cec5SDimitry Andric ShAmt); 25270b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 25280b57cec5SDimitry Andric SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 25290b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32)); 25300b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 25310b57cec5SDimitry Andric SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 25320b57cec5SDimitry Andric SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 25330b57cec5SDimitry Andric 25340b57cec5SDimitry Andric SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 25350b57cec5SDimitry Andric DAG.getConstant(VTBits, dl, MVT::i32), 25360b57cec5SDimitry Andric ISD::SETGE); 25370b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 25380b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 25390b57cec5SDimitry Andric 25400b57cec5SDimitry Andric SDValue Ops[2] = { Lo, Hi }; 25410b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl); 25420b57cec5SDimitry Andric } 25430b57cec5SDimitry Andric } 25440b57cec5SDimitry Andric 25450b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { 25460b57cec5SDimitry Andric EVT VT = Op.getValueType(); 25470b57cec5SDimitry Andric 25480b57cec5SDimitry Andric if (VT == MVT::f32) 25490b57cec5SDimitry Andric return LowerFROUND32(Op, DAG); 25500b57cec5SDimitry Andric 25510b57cec5SDimitry Andric if (VT == MVT::f64) 25520b57cec5SDimitry Andric return LowerFROUND64(Op, DAG); 25530b57cec5SDimitry Andric 25540b57cec5SDimitry Andric llvm_unreachable("unhandled type"); 25550b57cec5SDimitry Andric } 25560b57cec5SDimitry Andric 25570b57cec5SDimitry Andric // This is the the rounding method used in CUDA libdevice in C like code: 25580b57cec5SDimitry Andric // float roundf(float A) 25590b57cec5SDimitry Andric // { 25600b57cec5SDimitry Andric // float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f)); 25610b57cec5SDimitry Andric // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA; 25620b57cec5SDimitry Andric // return abs(A) < 0.5 ? (float)(int)A : RoundedA; 25630b57cec5SDimitry Andric // } 25640b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op, 25650b57cec5SDimitry Andric SelectionDAG &DAG) const { 25660b57cec5SDimitry Andric SDLoc SL(Op); 25670b57cec5SDimitry Andric SDValue A = Op.getOperand(0); 25680b57cec5SDimitry Andric EVT VT = Op.getValueType(); 25690b57cec5SDimitry Andric 25700b57cec5SDimitry Andric SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A); 25710b57cec5SDimitry Andric 25720b57cec5SDimitry Andric // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f)) 25730b57cec5SDimitry Andric SDValue Bitcast = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A); 25740b57cec5SDimitry Andric const int SignBitMask = 0x80000000; 25750b57cec5SDimitry Andric SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast, 25760b57cec5SDimitry Andric DAG.getConstant(SignBitMask, SL, MVT::i32)); 25770b57cec5SDimitry Andric const int PointFiveInBits = 0x3F000000; 25780b57cec5SDimitry Andric SDValue PointFiveWithSignRaw = 25790b57cec5SDimitry Andric DAG.getNode(ISD::OR, SL, MVT::i32, Sign, 25800b57cec5SDimitry Andric DAG.getConstant(PointFiveInBits, SL, MVT::i32)); 25810b57cec5SDimitry Andric SDValue PointFiveWithSign = 25820b57cec5SDimitry Andric DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw); 25830b57cec5SDimitry Andric SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign); 25840b57cec5SDimitry Andric SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA); 25850b57cec5SDimitry Andric 25860b57cec5SDimitry Andric // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA; 25870b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 25880b57cec5SDimitry Andric SDValue IsLarge = 25890b57cec5SDimitry Andric DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT), 25900b57cec5SDimitry Andric ISD::SETOGT); 25910b57cec5SDimitry Andric RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA); 25920b57cec5SDimitry Andric 25930b57cec5SDimitry Andric // return abs(A) < 0.5 ? (float)(int)A : RoundedA; 25940b57cec5SDimitry Andric SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA, 25950b57cec5SDimitry Andric DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT); 25960b57cec5SDimitry Andric SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A); 25970b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA); 25980b57cec5SDimitry Andric } 25990b57cec5SDimitry Andric 26000b57cec5SDimitry Andric // The implementation of round(double) is similar to that of round(float) in 26010b57cec5SDimitry Andric // that they both separate the value range into three regions and use a method 26020b57cec5SDimitry Andric // specific to the region to round the values. However, round(double) first 26030b57cec5SDimitry Andric // calculates the round of the absolute value and then adds the sign back while 26040b57cec5SDimitry Andric // round(float) directly rounds the value with sign. 26050b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op, 26060b57cec5SDimitry Andric SelectionDAG &DAG) const { 26070b57cec5SDimitry Andric SDLoc SL(Op); 26080b57cec5SDimitry Andric SDValue A = Op.getOperand(0); 26090b57cec5SDimitry Andric EVT VT = Op.getValueType(); 26100b57cec5SDimitry Andric 26110b57cec5SDimitry Andric SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A); 26120b57cec5SDimitry Andric 26130b57cec5SDimitry Andric // double RoundedA = (double) (int) (abs(A) + 0.5f); 26140b57cec5SDimitry Andric SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA, 26150b57cec5SDimitry Andric DAG.getConstantFP(0.5, SL, VT)); 26160b57cec5SDimitry Andric SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA); 26170b57cec5SDimitry Andric 26180b57cec5SDimitry Andric // RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA; 26190b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 26200b57cec5SDimitry Andric SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA, 26210b57cec5SDimitry Andric DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT); 26220b57cec5SDimitry Andric RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall, 26230b57cec5SDimitry Andric DAG.getConstantFP(0, SL, VT), 26240b57cec5SDimitry Andric RoundedA); 26250b57cec5SDimitry Andric 26260b57cec5SDimitry Andric // Add sign to rounded_A 26270b57cec5SDimitry Andric RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A); 26280b57cec5SDimitry Andric DAG.getNode(ISD::FTRUNC, SL, VT, A); 26290b57cec5SDimitry Andric 26300b57cec5SDimitry Andric // RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA; 26310b57cec5SDimitry Andric SDValue IsLarge = 26320b57cec5SDimitry Andric DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT), 26330b57cec5SDimitry Andric ISD::SETOGT); 26340b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA); 26350b57cec5SDimitry Andric } 26360b57cec5SDimitry Andric 26375f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op, 26385f757f3fSDimitry Andric SelectionDAG &DAG) const { 26395f757f3fSDimitry Andric assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78); 26400b57cec5SDimitry Andric 26415f757f3fSDimitry Andric if (Op.getValueType() == MVT::bf16) { 26425f757f3fSDimitry Andric SDLoc Loc(Op); 26435f757f3fSDimitry Andric return DAG.getNode( 26445f757f3fSDimitry Andric ISD::FP_ROUND, Loc, MVT::bf16, 26455f757f3fSDimitry Andric DAG.getNode(Op.getOpcode(), Loc, MVT::f32, Op.getOperand(0)), 26465f757f3fSDimitry Andric DAG.getIntPtrConstant(0, Loc)); 26475f757f3fSDimitry Andric } 26485f757f3fSDimitry Andric 26495f757f3fSDimitry Andric // Everything else is considered legal. 26505f757f3fSDimitry Andric return Op; 26515f757f3fSDimitry Andric } 26525f757f3fSDimitry Andric 26535f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerFP_TO_INT(SDValue Op, 26545f757f3fSDimitry Andric SelectionDAG &DAG) const { 26555f757f3fSDimitry Andric assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78); 26565f757f3fSDimitry Andric 26575f757f3fSDimitry Andric if (Op.getOperand(0).getValueType() == MVT::bf16) { 26585f757f3fSDimitry Andric SDLoc Loc(Op); 26595f757f3fSDimitry Andric return DAG.getNode( 26605f757f3fSDimitry Andric Op.getOpcode(), Loc, Op.getValueType(), 26615f757f3fSDimitry Andric DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, Op.getOperand(0))); 26625f757f3fSDimitry Andric } 26635f757f3fSDimitry Andric 26645f757f3fSDimitry Andric // Everything else is considered legal. 26655f757f3fSDimitry Andric return Op; 26665f757f3fSDimitry Andric } 26675f757f3fSDimitry Andric 26680fca6ea1SDimitry Andric SDValue NVPTXTargetLowering::LowerFP_ROUND(SDValue Op, 26690fca6ea1SDimitry Andric SelectionDAG &DAG) const { 26700fca6ea1SDimitry Andric EVT NarrowVT = Op.getValueType(); 26710fca6ea1SDimitry Andric SDValue Wide = Op.getOperand(0); 26720fca6ea1SDimitry Andric EVT WideVT = Wide.getValueType(); 26730fca6ea1SDimitry Andric if (NarrowVT.getScalarType() == MVT::bf16) { 26740fca6ea1SDimitry Andric const TargetLowering *TLI = STI.getTargetLowering(); 26750fca6ea1SDimitry Andric if (STI.getSmVersion() < 80 || STI.getPTXVersion() < 70) { 26760fca6ea1SDimitry Andric return TLI->expandFP_ROUND(Op.getNode(), DAG); 26770fca6ea1SDimitry Andric } 26780fca6ea1SDimitry Andric if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) { 26790fca6ea1SDimitry Andric // This combination was the first to support f32 -> bf16. 26800fca6ea1SDimitry Andric if (STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70) { 26810fca6ea1SDimitry Andric if (WideVT.getScalarType() == MVT::f32) { 26820fca6ea1SDimitry Andric return Op; 26830fca6ea1SDimitry Andric } 26840fca6ea1SDimitry Andric if (WideVT.getScalarType() == MVT::f64) { 26850fca6ea1SDimitry Andric SDLoc Loc(Op); 26860fca6ea1SDimitry Andric // Round-inexact-to-odd f64 to f32, then do the final rounding using 26870fca6ea1SDimitry Andric // the hardware f32 -> bf16 instruction. 26880fca6ea1SDimitry Andric SDValue rod = TLI->expandRoundInexactToOdd( 26890fca6ea1SDimitry Andric WideVT.isVector() ? WideVT.changeVectorElementType(MVT::f32) 26900fca6ea1SDimitry Andric : MVT::f32, 26910fca6ea1SDimitry Andric Wide, Loc, DAG); 26920fca6ea1SDimitry Andric return DAG.getFPExtendOrRound(rod, Loc, NarrowVT); 26930fca6ea1SDimitry Andric } 26940fca6ea1SDimitry Andric } 26950fca6ea1SDimitry Andric return TLI->expandFP_ROUND(Op.getNode(), DAG); 26960fca6ea1SDimitry Andric } 26970fca6ea1SDimitry Andric } 26980fca6ea1SDimitry Andric 26990fca6ea1SDimitry Andric // Everything else is considered legal. 27000fca6ea1SDimitry Andric return Op; 27010fca6ea1SDimitry Andric } 27020fca6ea1SDimitry Andric 27030fca6ea1SDimitry Andric SDValue NVPTXTargetLowering::LowerFP_EXTEND(SDValue Op, 27040fca6ea1SDimitry Andric SelectionDAG &DAG) const { 27050fca6ea1SDimitry Andric SDValue Narrow = Op.getOperand(0); 27060fca6ea1SDimitry Andric EVT NarrowVT = Narrow.getValueType(); 27070fca6ea1SDimitry Andric EVT WideVT = Op.getValueType(); 27080fca6ea1SDimitry Andric if (NarrowVT.getScalarType() == MVT::bf16) { 27090fca6ea1SDimitry Andric if (WideVT.getScalarType() == MVT::f32 && 27100fca6ea1SDimitry Andric (STI.getSmVersion() < 80 || STI.getPTXVersion() < 71)) { 27110fca6ea1SDimitry Andric SDLoc Loc(Op); 27120fca6ea1SDimitry Andric return DAG.getNode(ISD::BF16_TO_FP, Loc, WideVT, Narrow); 27130fca6ea1SDimitry Andric } 27140fca6ea1SDimitry Andric if (WideVT.getScalarType() == MVT::f64 && 27150fca6ea1SDimitry Andric (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78)) { 27160fca6ea1SDimitry Andric EVT F32 = NarrowVT.isVector() ? NarrowVT.changeVectorElementType(MVT::f32) 27170fca6ea1SDimitry Andric : MVT::f32; 27180fca6ea1SDimitry Andric SDLoc Loc(Op); 27190fca6ea1SDimitry Andric if (STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 71) { 27200fca6ea1SDimitry Andric Op = DAG.getNode(ISD::FP_EXTEND, Loc, F32, Narrow); 27210fca6ea1SDimitry Andric } else { 27220fca6ea1SDimitry Andric Op = DAG.getNode(ISD::BF16_TO_FP, Loc, F32, Narrow); 27230fca6ea1SDimitry Andric } 27240fca6ea1SDimitry Andric return DAG.getNode(ISD::FP_EXTEND, Loc, WideVT, Op); 27250fca6ea1SDimitry Andric } 27260fca6ea1SDimitry Andric } 27270fca6ea1SDimitry Andric 27280fca6ea1SDimitry Andric // Everything else is considered legal. 27290fca6ea1SDimitry Andric return Op; 27300fca6ea1SDimitry Andric } 27310fca6ea1SDimitry Andric 27325f757f3fSDimitry Andric static SDValue LowerVectorArith(SDValue Op, SelectionDAG &DAG) { 27335f757f3fSDimitry Andric SDLoc DL(Op); 27345f757f3fSDimitry Andric if (Op.getValueType() != MVT::v2i16) 27355f757f3fSDimitry Andric return Op; 27365f757f3fSDimitry Andric EVT EltVT = Op.getValueType().getVectorElementType(); 27375f757f3fSDimitry Andric SmallVector<SDValue> VecElements; 27385f757f3fSDimitry Andric for (int I = 0, E = Op.getValueType().getVectorNumElements(); I < E; I++) { 27395f757f3fSDimitry Andric SmallVector<SDValue> ScalarArgs; 27405f757f3fSDimitry Andric llvm::transform(Op->ops(), std::back_inserter(ScalarArgs), 27415f757f3fSDimitry Andric [&](const SDUse &O) { 27425f757f3fSDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, 27435f757f3fSDimitry Andric O.get(), DAG.getIntPtrConstant(I, DL)); 27445f757f3fSDimitry Andric }); 27455f757f3fSDimitry Andric VecElements.push_back(DAG.getNode(Op.getOpcode(), DL, EltVT, ScalarArgs)); 27465f757f3fSDimitry Andric } 27475f757f3fSDimitry Andric SDValue V = 27485f757f3fSDimitry Andric DAG.getNode(ISD::BUILD_VECTOR, DL, Op.getValueType(), VecElements); 27495f757f3fSDimitry Andric return V; 27505f757f3fSDimitry Andric } 27510b57cec5SDimitry Andric 27520b57cec5SDimitry Andric SDValue 27530b57cec5SDimitry Andric NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 27540b57cec5SDimitry Andric switch (Op.getOpcode()) { 27550b57cec5SDimitry Andric case ISD::RETURNADDR: 27560b57cec5SDimitry Andric return SDValue(); 27570b57cec5SDimitry Andric case ISD::FRAMEADDR: 27580b57cec5SDimitry Andric return SDValue(); 27590b57cec5SDimitry Andric case ISD::GlobalAddress: 27600b57cec5SDimitry Andric return LowerGlobalAddress(Op, DAG); 27610b57cec5SDimitry Andric case ISD::INTRINSIC_W_CHAIN: 27620b57cec5SDimitry Andric return Op; 27630b57cec5SDimitry Andric case ISD::BUILD_VECTOR: 27640b57cec5SDimitry Andric return LowerBUILD_VECTOR(Op, DAG); 27650b57cec5SDimitry Andric case ISD::EXTRACT_SUBVECTOR: 27660b57cec5SDimitry Andric return Op; 27670b57cec5SDimitry Andric case ISD::EXTRACT_VECTOR_ELT: 27680b57cec5SDimitry Andric return LowerEXTRACT_VECTOR_ELT(Op, DAG); 27695f757f3fSDimitry Andric case ISD::INSERT_VECTOR_ELT: 27705f757f3fSDimitry Andric return LowerINSERT_VECTOR_ELT(Op, DAG); 27715f757f3fSDimitry Andric case ISD::VECTOR_SHUFFLE: 27725f757f3fSDimitry Andric return LowerVECTOR_SHUFFLE(Op, DAG); 27730b57cec5SDimitry Andric case ISD::CONCAT_VECTORS: 27740b57cec5SDimitry Andric return LowerCONCAT_VECTORS(Op, DAG); 27750b57cec5SDimitry Andric case ISD::STORE: 27760b57cec5SDimitry Andric return LowerSTORE(Op, DAG); 27770b57cec5SDimitry Andric case ISD::LOAD: 27780b57cec5SDimitry Andric return LowerLOAD(Op, DAG); 27790b57cec5SDimitry Andric case ISD::SHL_PARTS: 27800b57cec5SDimitry Andric return LowerShiftLeftParts(Op, DAG); 27810b57cec5SDimitry Andric case ISD::SRA_PARTS: 27820b57cec5SDimitry Andric case ISD::SRL_PARTS: 27830b57cec5SDimitry Andric return LowerShiftRightParts(Op, DAG); 27840b57cec5SDimitry Andric case ISD::SELECT: 27850b57cec5SDimitry Andric return LowerSelect(Op, DAG); 27860b57cec5SDimitry Andric case ISD::FROUND: 27870b57cec5SDimitry Andric return LowerFROUND(Op, DAG); 27885f757f3fSDimitry Andric case ISD::SINT_TO_FP: 27895f757f3fSDimitry Andric case ISD::UINT_TO_FP: 27905f757f3fSDimitry Andric return LowerINT_TO_FP(Op, DAG); 27915f757f3fSDimitry Andric case ISD::FP_TO_SINT: 27925f757f3fSDimitry Andric case ISD::FP_TO_UINT: 27935f757f3fSDimitry Andric return LowerFP_TO_INT(Op, DAG); 27940fca6ea1SDimitry Andric case ISD::FP_ROUND: 27950fca6ea1SDimitry Andric return LowerFP_ROUND(Op, DAG); 27960fca6ea1SDimitry Andric case ISD::FP_EXTEND: 27970fca6ea1SDimitry Andric return LowerFP_EXTEND(Op, DAG); 2798bdd1243dSDimitry Andric case ISD::VAARG: 2799bdd1243dSDimitry Andric return LowerVAARG(Op, DAG); 2800bdd1243dSDimitry Andric case ISD::VASTART: 2801bdd1243dSDimitry Andric return LowerVASTART(Op, DAG); 28025f757f3fSDimitry Andric case ISD::ABS: 28035f757f3fSDimitry Andric case ISD::SMIN: 28045f757f3fSDimitry Andric case ISD::SMAX: 28055f757f3fSDimitry Andric case ISD::UMIN: 28065f757f3fSDimitry Andric case ISD::UMAX: 28075f757f3fSDimitry Andric case ISD::ADD: 28085f757f3fSDimitry Andric case ISD::SUB: 28095f757f3fSDimitry Andric case ISD::MUL: 28105f757f3fSDimitry Andric case ISD::SHL: 28115f757f3fSDimitry Andric case ISD::SREM: 28125f757f3fSDimitry Andric case ISD::UREM: 28135f757f3fSDimitry Andric return LowerVectorArith(Op, DAG); 28145f757f3fSDimitry Andric case ISD::DYNAMIC_STACKALLOC: 28155f757f3fSDimitry Andric return LowerDYNAMIC_STACKALLOC(Op, DAG); 28160fca6ea1SDimitry Andric case ISD::CopyToReg: 28170fca6ea1SDimitry Andric return LowerCopyToReg_128(Op, DAG); 28180b57cec5SDimitry Andric default: 28190b57cec5SDimitry Andric llvm_unreachable("Custom lowering not defined for operation"); 28200b57cec5SDimitry Andric } 28210b57cec5SDimitry Andric } 28220b57cec5SDimitry Andric 2823bdd1243dSDimitry Andric // This function is almost a copy of SelectionDAG::expandVAArg(). 2824bdd1243dSDimitry Andric // The only diff is that this one produces loads from local address space. 2825bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { 2826bdd1243dSDimitry Andric const TargetLowering *TLI = STI.getTargetLowering(); 2827bdd1243dSDimitry Andric SDLoc DL(Op); 2828bdd1243dSDimitry Andric 2829bdd1243dSDimitry Andric SDNode *Node = Op.getNode(); 2830bdd1243dSDimitry Andric const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2831bdd1243dSDimitry Andric EVT VT = Node->getValueType(0); 2832bdd1243dSDimitry Andric auto *Ty = VT.getTypeForEVT(*DAG.getContext()); 2833bdd1243dSDimitry Andric SDValue Tmp1 = Node->getOperand(0); 2834bdd1243dSDimitry Andric SDValue Tmp2 = Node->getOperand(1); 2835bdd1243dSDimitry Andric const MaybeAlign MA(Node->getConstantOperandVal(3)); 2836bdd1243dSDimitry Andric 2837bdd1243dSDimitry Andric SDValue VAListLoad = DAG.getLoad(TLI->getPointerTy(DAG.getDataLayout()), DL, 2838bdd1243dSDimitry Andric Tmp1, Tmp2, MachinePointerInfo(V)); 2839bdd1243dSDimitry Andric SDValue VAList = VAListLoad; 2840bdd1243dSDimitry Andric 2841bdd1243dSDimitry Andric if (MA && *MA > TLI->getMinStackArgumentAlignment()) { 2842bdd1243dSDimitry Andric VAList = DAG.getNode( 2843bdd1243dSDimitry Andric ISD::ADD, DL, VAList.getValueType(), VAList, 2844bdd1243dSDimitry Andric DAG.getConstant(MA->value() - 1, DL, VAList.getValueType())); 2845bdd1243dSDimitry Andric 2846bdd1243dSDimitry Andric VAList = DAG.getNode( 2847bdd1243dSDimitry Andric ISD::AND, DL, VAList.getValueType(), VAList, 2848bdd1243dSDimitry Andric DAG.getConstant(-(int64_t)MA->value(), DL, VAList.getValueType())); 2849bdd1243dSDimitry Andric } 2850bdd1243dSDimitry Andric 2851bdd1243dSDimitry Andric // Increment the pointer, VAList, to the next vaarg 2852bdd1243dSDimitry Andric Tmp1 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, 2853bdd1243dSDimitry Andric DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(Ty), 2854bdd1243dSDimitry Andric DL, VAList.getValueType())); 2855bdd1243dSDimitry Andric 2856bdd1243dSDimitry Andric // Store the incremented VAList to the legalized pointer 2857bdd1243dSDimitry Andric Tmp1 = DAG.getStore(VAListLoad.getValue(1), DL, Tmp1, Tmp2, 2858bdd1243dSDimitry Andric MachinePointerInfo(V)); 2859bdd1243dSDimitry Andric 2860bdd1243dSDimitry Andric const Value *SrcV = 2861bdd1243dSDimitry Andric Constant::getNullValue(PointerType::get(Ty, ADDRESS_SPACE_LOCAL)); 2862bdd1243dSDimitry Andric 2863bdd1243dSDimitry Andric // Load the actual argument out of the pointer VAList 2864bdd1243dSDimitry Andric return DAG.getLoad(VT, DL, Tmp1, VAList, MachinePointerInfo(SrcV)); 2865bdd1243dSDimitry Andric } 2866bdd1243dSDimitry Andric 2867bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 2868bdd1243dSDimitry Andric const TargetLowering *TLI = STI.getTargetLowering(); 2869bdd1243dSDimitry Andric SDLoc DL(Op); 2870bdd1243dSDimitry Andric EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout()); 2871bdd1243dSDimitry Andric 2872bdd1243dSDimitry Andric // Store the address of unsized array <function>_vararg[] in the ap object. 2873bdd1243dSDimitry Andric SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT); 2874bdd1243dSDimitry Andric SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg); 2875bdd1243dSDimitry Andric 2876bdd1243dSDimitry Andric const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2877bdd1243dSDimitry Andric return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1), 2878bdd1243dSDimitry Andric MachinePointerInfo(SV)); 2879bdd1243dSDimitry Andric } 2880bdd1243dSDimitry Andric 28810b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { 28820b57cec5SDimitry Andric SDValue Op0 = Op->getOperand(0); 28830b57cec5SDimitry Andric SDValue Op1 = Op->getOperand(1); 28840b57cec5SDimitry Andric SDValue Op2 = Op->getOperand(2); 28850b57cec5SDimitry Andric SDLoc DL(Op.getNode()); 28860b57cec5SDimitry Andric 28870b57cec5SDimitry Andric assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1"); 28880b57cec5SDimitry Andric 28890b57cec5SDimitry Andric Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); 28900b57cec5SDimitry Andric Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); 28910b57cec5SDimitry Andric SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); 28920b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); 28930b57cec5SDimitry Andric 28940b57cec5SDimitry Andric return Trunc; 28950b57cec5SDimitry Andric } 28960b57cec5SDimitry Andric 28970b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 28980b57cec5SDimitry Andric if (Op.getValueType() == MVT::i1) 28990b57cec5SDimitry Andric return LowerLOADi1(Op, DAG); 29000b57cec5SDimitry Andric 29015f757f3fSDimitry Andric // v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to handle 29025f757f3fSDimitry Andric // unaligned loads and have to handle it here. 29035f757f3fSDimitry Andric EVT VT = Op.getValueType(); 29045f757f3fSDimitry Andric if (Isv2x16VT(VT) || VT == MVT::v4i8) { 29050b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op); 29060b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT(); 29078bcb0991SDimitry Andric if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 29088bcb0991SDimitry Andric MemVT, *Load->getMemOperand())) { 29090b57cec5SDimitry Andric SDValue Ops[2]; 29100b57cec5SDimitry Andric std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); 29110b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc(Op)); 29120b57cec5SDimitry Andric } 29130b57cec5SDimitry Andric } 29140b57cec5SDimitry Andric 29150b57cec5SDimitry Andric return SDValue(); 29160b57cec5SDimitry Andric } 29170b57cec5SDimitry Andric 29180b57cec5SDimitry Andric // v = ld i1* addr 29190b57cec5SDimitry Andric // => 29200b57cec5SDimitry Andric // v1 = ld i8* addr (-> i16) 29210b57cec5SDimitry Andric // v = trunc i16 to i1 29220b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { 29230b57cec5SDimitry Andric SDNode *Node = Op.getNode(); 29240b57cec5SDimitry Andric LoadSDNode *LD = cast<LoadSDNode>(Node); 29250b57cec5SDimitry Andric SDLoc dl(Node); 29260b57cec5SDimitry Andric assert(LD->getExtensionType() == ISD::NON_EXTLOAD); 29270b57cec5SDimitry Andric assert(Node->getValueType(0) == MVT::i1 && 29280b57cec5SDimitry Andric "Custom lowering for i1 load only"); 29290fca6ea1SDimitry Andric SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(), 29300fca6ea1SDimitry Andric LD->getBasePtr(), LD->getPointerInfo(), 29310fca6ea1SDimitry Andric MVT::i8, LD->getAlign(), 29320b57cec5SDimitry Andric LD->getMemOperand()->getFlags()); 29330b57cec5SDimitry Andric SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 29340b57cec5SDimitry Andric // The legalizer (the caller) is expecting two values from the legalized 29350b57cec5SDimitry Andric // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 29360b57cec5SDimitry Andric // in LegalizeDAG.cpp which also uses MergeValues. 29370b57cec5SDimitry Andric SDValue Ops[] = { result, LD->getChain() }; 29380b57cec5SDimitry Andric return DAG.getMergeValues(Ops, dl); 29390b57cec5SDimitry Andric } 29400b57cec5SDimitry Andric 29410b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 29420b57cec5SDimitry Andric StoreSDNode *Store = cast<StoreSDNode>(Op); 29430b57cec5SDimitry Andric EVT VT = Store->getMemoryVT(); 29440b57cec5SDimitry Andric 29450b57cec5SDimitry Andric if (VT == MVT::i1) 29460b57cec5SDimitry Andric return LowerSTOREi1(Op, DAG); 29470b57cec5SDimitry Andric 29480b57cec5SDimitry Andric // v2f16 is legal, so we can't rely on legalizer to handle unaligned 29490b57cec5SDimitry Andric // stores and have to handle it here. 29505f757f3fSDimitry Andric if ((Isv2x16VT(VT) || VT == MVT::v4i8) && 29518bcb0991SDimitry Andric !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 29528bcb0991SDimitry Andric VT, *Store->getMemOperand())) 29530b57cec5SDimitry Andric return expandUnalignedStore(Store, DAG); 29540b57cec5SDimitry Andric 29555f757f3fSDimitry Andric // v2f16, v2bf16 and v2i16 don't need special handling. 29565f757f3fSDimitry Andric if (Isv2x16VT(VT) || VT == MVT::v4i8) 295706c3fb27SDimitry Andric return SDValue(); 295806c3fb27SDimitry Andric 29590b57cec5SDimitry Andric if (VT.isVector()) 29600b57cec5SDimitry Andric return LowerSTOREVector(Op, DAG); 29610b57cec5SDimitry Andric 29620b57cec5SDimitry Andric return SDValue(); 29630b57cec5SDimitry Andric } 29640b57cec5SDimitry Andric 29650b57cec5SDimitry Andric SDValue 29660b57cec5SDimitry Andric NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { 29670b57cec5SDimitry Andric SDNode *N = Op.getNode(); 29680b57cec5SDimitry Andric SDValue Val = N->getOperand(1); 29690b57cec5SDimitry Andric SDLoc DL(N); 29700b57cec5SDimitry Andric EVT ValVT = Val.getValueType(); 29710b57cec5SDimitry Andric 29720b57cec5SDimitry Andric if (ValVT.isVector()) { 29730b57cec5SDimitry Andric // We only handle "native" vector sizes for now, e.g. <4 x double> is not 29740b57cec5SDimitry Andric // legal. We can (and should) split that into 2 stores of <2 x double> here 29750b57cec5SDimitry Andric // but I'm leaving that as a TODO for now. 29760b57cec5SDimitry Andric if (!ValVT.isSimple()) 29770b57cec5SDimitry Andric return SDValue(); 29780b57cec5SDimitry Andric switch (ValVT.getSimpleVT().SimpleTy) { 29790b57cec5SDimitry Andric default: 29800b57cec5SDimitry Andric return SDValue(); 29810b57cec5SDimitry Andric case MVT::v2i8: 29820b57cec5SDimitry Andric case MVT::v2i16: 29830b57cec5SDimitry Andric case MVT::v2i32: 29840b57cec5SDimitry Andric case MVT::v2i64: 29850b57cec5SDimitry Andric case MVT::v2f16: 2986bdd1243dSDimitry Andric case MVT::v2bf16: 29870b57cec5SDimitry Andric case MVT::v2f32: 29880b57cec5SDimitry Andric case MVT::v2f64: 29890b57cec5SDimitry Andric case MVT::v4i8: 29900b57cec5SDimitry Andric case MVT::v4i16: 29910b57cec5SDimitry Andric case MVT::v4i32: 29920b57cec5SDimitry Andric case MVT::v4f16: 2993bdd1243dSDimitry Andric case MVT::v4bf16: 29940b57cec5SDimitry Andric case MVT::v4f32: 29950b57cec5SDimitry Andric case MVT::v8f16: // <4 x f16x2> 2996bdd1243dSDimitry Andric case MVT::v8bf16: // <4 x bf16x2> 29975f757f3fSDimitry Andric case MVT::v8i16: // <4 x i16x2> 29980b57cec5SDimitry Andric // This is a "native" vector type 29990b57cec5SDimitry Andric break; 30000b57cec5SDimitry Andric } 30010b57cec5SDimitry Andric 30020b57cec5SDimitry Andric MemSDNode *MemSD = cast<MemSDNode>(N); 30030b57cec5SDimitry Andric const DataLayout &TD = DAG.getDataLayout(); 30040b57cec5SDimitry Andric 30055ffd83dbSDimitry Andric Align Alignment = MemSD->getAlign(); 30065ffd83dbSDimitry Andric Align PrefAlign = 30075ffd83dbSDimitry Andric TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext())); 30085ffd83dbSDimitry Andric if (Alignment < PrefAlign) { 30090b57cec5SDimitry Andric // This store is not sufficiently aligned, so bail out and let this vector 30100b57cec5SDimitry Andric // store be scalarized. Note that we may still be able to emit smaller 30110b57cec5SDimitry Andric // vector stores. For example, if we are storing a <4 x float> with an 30120b57cec5SDimitry Andric // alignment of 8, this check will fail but the legalizer will try again 30130b57cec5SDimitry Andric // with 2 x <2 x float>, which will succeed with an alignment of 8. 30140b57cec5SDimitry Andric return SDValue(); 30150b57cec5SDimitry Andric } 30160b57cec5SDimitry Andric 30170b57cec5SDimitry Andric unsigned Opcode = 0; 30180b57cec5SDimitry Andric EVT EltVT = ValVT.getVectorElementType(); 30190b57cec5SDimitry Andric unsigned NumElts = ValVT.getVectorNumElements(); 30200b57cec5SDimitry Andric 30210b57cec5SDimitry Andric // Since StoreV2 is a target node, we cannot rely on DAG type legalization. 30220b57cec5SDimitry Andric // Therefore, we must ensure the type is legal. For i1 and i8, we set the 30230b57cec5SDimitry Andric // stored type to i16 and propagate the "real" type as the memory type. 30240b57cec5SDimitry Andric bool NeedExt = false; 30250b57cec5SDimitry Andric if (EltVT.getSizeInBits() < 16) 30260b57cec5SDimitry Andric NeedExt = true; 30270b57cec5SDimitry Andric 30280b57cec5SDimitry Andric bool StoreF16x2 = false; 30290b57cec5SDimitry Andric switch (NumElts) { 30300b57cec5SDimitry Andric default: 30310b57cec5SDimitry Andric return SDValue(); 30320b57cec5SDimitry Andric case 2: 30330b57cec5SDimitry Andric Opcode = NVPTXISD::StoreV2; 30340b57cec5SDimitry Andric break; 30350b57cec5SDimitry Andric case 4: 30360b57cec5SDimitry Andric Opcode = NVPTXISD::StoreV4; 30370b57cec5SDimitry Andric break; 30380b57cec5SDimitry Andric case 8: 30390b57cec5SDimitry Andric // v8f16 is a special case. PTX doesn't have st.v8.f16 30400b57cec5SDimitry Andric // instruction. Instead, we split the vector into v2f16 chunks and 30410b57cec5SDimitry Andric // store them with st.v4.b32. 30425f757f3fSDimitry Andric assert(Is16bitsType(EltVT.getSimpleVT()) && "Wrong type for the vector."); 30430b57cec5SDimitry Andric Opcode = NVPTXISD::StoreV4; 30440b57cec5SDimitry Andric StoreF16x2 = true; 30450b57cec5SDimitry Andric break; 30460b57cec5SDimitry Andric } 30470b57cec5SDimitry Andric 30480b57cec5SDimitry Andric SmallVector<SDValue, 8> Ops; 30490b57cec5SDimitry Andric 30500b57cec5SDimitry Andric // First is the chain 30510b57cec5SDimitry Andric Ops.push_back(N->getOperand(0)); 30520b57cec5SDimitry Andric 30530b57cec5SDimitry Andric if (StoreF16x2) { 30540b57cec5SDimitry Andric // Combine f16,f16 -> v2f16 30550b57cec5SDimitry Andric NumElts /= 2; 30560b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) { 305706c3fb27SDimitry Andric SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 30580b57cec5SDimitry Andric DAG.getIntPtrConstant(i * 2, DL)); 305906c3fb27SDimitry Andric SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 30600b57cec5SDimitry Andric DAG.getIntPtrConstant(i * 2 + 1, DL)); 306106c3fb27SDimitry Andric EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, 2); 306206c3fb27SDimitry Andric SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, E0, E1); 30630b57cec5SDimitry Andric Ops.push_back(V2); 30640b57cec5SDimitry Andric } 30650b57cec5SDimitry Andric } else { 30660b57cec5SDimitry Andric // Then the split values 30670b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) { 30680b57cec5SDimitry Andric SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 30690b57cec5SDimitry Andric DAG.getIntPtrConstant(i, DL)); 30700b57cec5SDimitry Andric if (NeedExt) 30710b57cec5SDimitry Andric ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); 30720b57cec5SDimitry Andric Ops.push_back(ExtVal); 30730b57cec5SDimitry Andric } 30740b57cec5SDimitry Andric } 30750b57cec5SDimitry Andric 30760b57cec5SDimitry Andric // Then any remaining arguments 30770b57cec5SDimitry Andric Ops.append(N->op_begin() + 2, N->op_end()); 30780b57cec5SDimitry Andric 30790b57cec5SDimitry Andric SDValue NewSt = 30800b57cec5SDimitry Andric DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops, 30810b57cec5SDimitry Andric MemSD->getMemoryVT(), MemSD->getMemOperand()); 30820b57cec5SDimitry Andric 30830b57cec5SDimitry Andric // return DCI.CombineTo(N, NewSt, true); 30840b57cec5SDimitry Andric return NewSt; 30850b57cec5SDimitry Andric } 30860b57cec5SDimitry Andric 30870b57cec5SDimitry Andric return SDValue(); 30880b57cec5SDimitry Andric } 30890b57cec5SDimitry Andric 30900b57cec5SDimitry Andric // st i1 v, addr 30910b57cec5SDimitry Andric // => 30920b57cec5SDimitry Andric // v1 = zxt v to i16 30930b57cec5SDimitry Andric // st.u8 i16, addr 30940b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { 30950b57cec5SDimitry Andric SDNode *Node = Op.getNode(); 30960b57cec5SDimitry Andric SDLoc dl(Node); 30970b57cec5SDimitry Andric StoreSDNode *ST = cast<StoreSDNode>(Node); 30980b57cec5SDimitry Andric SDValue Tmp1 = ST->getChain(); 30990b57cec5SDimitry Andric SDValue Tmp2 = ST->getBasePtr(); 31000b57cec5SDimitry Andric SDValue Tmp3 = ST->getValue(); 31010b57cec5SDimitry Andric assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 31020b57cec5SDimitry Andric Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); 31030b57cec5SDimitry Andric SDValue Result = 31040b57cec5SDimitry Andric DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8, 310581ad6265SDimitry Andric ST->getAlign(), ST->getMemOperand()->getFlags()); 31060b57cec5SDimitry Andric return Result; 31070b57cec5SDimitry Andric } 31080b57cec5SDimitry Andric 31090fca6ea1SDimitry Andric SDValue NVPTXTargetLowering::LowerCopyToReg_128(SDValue Op, 31100fca6ea1SDimitry Andric SelectionDAG &DAG) const { 31110fca6ea1SDimitry Andric // Change the CopyToReg to take in two 64-bit operands instead of a 128-bit 31120fca6ea1SDimitry Andric // operand so that it can pass the legalization. 31130fca6ea1SDimitry Andric 31140fca6ea1SDimitry Andric assert(Op.getOperand(1).getValueType() == MVT::i128 && 31150fca6ea1SDimitry Andric "Custom lowering for 128-bit CopyToReg only"); 31160fca6ea1SDimitry Andric 31170fca6ea1SDimitry Andric SDNode *Node = Op.getNode(); 31180fca6ea1SDimitry Andric SDLoc DL(Node); 31190fca6ea1SDimitry Andric 31200fca6ea1SDimitry Andric SDValue Cast = DAG.getBitcast(MVT::v2i64, Op->getOperand(2)); 31210fca6ea1SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Cast, 31220fca6ea1SDimitry Andric DAG.getIntPtrConstant(0, DL)); 31230fca6ea1SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Cast, 31240fca6ea1SDimitry Andric DAG.getIntPtrConstant(1, DL)); 31250fca6ea1SDimitry Andric 31260fca6ea1SDimitry Andric SmallVector<SDValue, 5> NewOps(Op->getNumOperands() + 1); 31270fca6ea1SDimitry Andric SmallVector<EVT, 3> ResultsType(Node->values()); 31280fca6ea1SDimitry Andric 31290fca6ea1SDimitry Andric NewOps[0] = Op->getOperand(0); // Chain 31300fca6ea1SDimitry Andric NewOps[1] = Op->getOperand(1); // Dst Reg 31310fca6ea1SDimitry Andric NewOps[2] = Lo; // Lower 64-bit 31320fca6ea1SDimitry Andric NewOps[3] = Hi; // Higher 64-bit 31330fca6ea1SDimitry Andric if (Op.getNumOperands() == 4) 31340fca6ea1SDimitry Andric NewOps[4] = Op->getOperand(3); // Glue if exists 31350fca6ea1SDimitry Andric 31360fca6ea1SDimitry Andric return DAG.getNode(ISD::CopyToReg, DL, ResultsType, NewOps); 31370fca6ea1SDimitry Andric } 31380fca6ea1SDimitry Andric 31390fca6ea1SDimitry Andric unsigned NVPTXTargetLowering::getNumRegisters( 31400fca6ea1SDimitry Andric LLVMContext &Context, EVT VT, 31410fca6ea1SDimitry Andric std::optional<MVT> RegisterVT = std::nullopt) const { 31420fca6ea1SDimitry Andric if (VT == MVT::i128 && RegisterVT == MVT::i128) 31430fca6ea1SDimitry Andric return 1; 31440fca6ea1SDimitry Andric return TargetLoweringBase::getNumRegisters(Context, VT, RegisterVT); 31450fca6ea1SDimitry Andric } 31460fca6ea1SDimitry Andric 31470fca6ea1SDimitry Andric bool NVPTXTargetLowering::splitValueIntoRegisterParts( 31480fca6ea1SDimitry Andric SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 31490fca6ea1SDimitry Andric unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 31500fca6ea1SDimitry Andric if (Val.getValueType() == MVT::i128 && NumParts == 1) { 31510fca6ea1SDimitry Andric Parts[0] = Val; 31520fca6ea1SDimitry Andric return true; 31530fca6ea1SDimitry Andric } 31540fca6ea1SDimitry Andric return false; 31550fca6ea1SDimitry Andric } 31560fca6ea1SDimitry Andric 3157bdd1243dSDimitry Andric // This creates target external symbol for a function parameter. 3158bdd1243dSDimitry Andric // Name of the symbol is composed from its index and the function name. 3159bdd1243dSDimitry Andric // Negative index corresponds to special parameter (unsized array) used for 3160bdd1243dSDimitry Andric // passing variable arguments. 3161bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, 3162bdd1243dSDimitry Andric EVT v) const { 316306c3fb27SDimitry Andric StringRef SavedStr = nvTM->getStrPool().save( 316406c3fb27SDimitry Andric getParamName(&DAG.getMachineFunction().getFunction(), idx)); 3165bdd1243dSDimitry Andric return DAG.getTargetExternalSymbol(SavedStr.data(), v); 31660b57cec5SDimitry Andric } 31670b57cec5SDimitry Andric 31680b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFormalArguments( 31690b57cec5SDimitry Andric SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 31700b57cec5SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 31710b57cec5SDimitry Andric SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 31720b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 31730b57cec5SDimitry Andric const DataLayout &DL = DAG.getDataLayout(); 31740b57cec5SDimitry Andric auto PtrVT = getPointerTy(DAG.getDataLayout()); 31750b57cec5SDimitry Andric 31760b57cec5SDimitry Andric const Function *F = &MF.getFunction(); 31770b57cec5SDimitry Andric const AttributeList &PAL = F->getAttributes(); 31780b57cec5SDimitry Andric const TargetLowering *TLI = STI.getTargetLowering(); 31790b57cec5SDimitry Andric 31800b57cec5SDimitry Andric SDValue Root = DAG.getRoot(); 31810b57cec5SDimitry Andric std::vector<SDValue> OutChains; 31820b57cec5SDimitry Andric 31830b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20); 31840b57cec5SDimitry Andric assert(isABI && "Non-ABI compilation is not supported"); 31850b57cec5SDimitry Andric if (!isABI) 31860b57cec5SDimitry Andric return Chain; 31870b57cec5SDimitry Andric 31880b57cec5SDimitry Andric std::vector<Type *> argTypes; 31890b57cec5SDimitry Andric std::vector<const Argument *> theArgs; 31900b57cec5SDimitry Andric for (const Argument &I : F->args()) { 31910b57cec5SDimitry Andric theArgs.push_back(&I); 31920b57cec5SDimitry Andric argTypes.push_back(I.getType()); 31930b57cec5SDimitry Andric } 31940b57cec5SDimitry Andric // argTypes.size() (or theArgs.size()) and Ins.size() need not match. 31950b57cec5SDimitry Andric // Ins.size() will be larger 31960b57cec5SDimitry Andric // * if there is an aggregate argument with multiple fields (each field 31970b57cec5SDimitry Andric // showing up separately in Ins) 31980b57cec5SDimitry Andric // * if there is a vector argument with more than typical vector-length 31990b57cec5SDimitry Andric // elements (generally if more than 4) where each vector element is 32000b57cec5SDimitry Andric // individually present in Ins. 32010b57cec5SDimitry Andric // So a different index should be used for indexing into Ins. 32020b57cec5SDimitry Andric // See similar issue in LowerCall. 32030b57cec5SDimitry Andric unsigned InsIdx = 0; 32040b57cec5SDimitry Andric 32050fca6ea1SDimitry Andric for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++InsIdx) { 32060b57cec5SDimitry Andric Type *Ty = argTypes[i]; 32070b57cec5SDimitry Andric 32080b57cec5SDimitry Andric if (theArgs[i]->use_empty()) { 32090b57cec5SDimitry Andric // argument is dead 321006c3fb27SDimitry Andric if (IsTypePassedAsArray(Ty) && !Ty->isVectorTy()) { 32110b57cec5SDimitry Andric SmallVector<EVT, 16> vtparts; 32120b57cec5SDimitry Andric 32130b57cec5SDimitry Andric ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); 321406c3fb27SDimitry Andric if (vtparts.empty()) 321506c3fb27SDimitry Andric report_fatal_error("Empty parameter types are not supported"); 321606c3fb27SDimitry Andric 32170b57cec5SDimitry Andric for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 32180b57cec5SDimitry Andric ++parti) { 32190b57cec5SDimitry Andric InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 32200b57cec5SDimitry Andric ++InsIdx; 32210b57cec5SDimitry Andric } 32220b57cec5SDimitry Andric if (vtparts.size() > 0) 32230b57cec5SDimitry Andric --InsIdx; 32240b57cec5SDimitry Andric continue; 32250b57cec5SDimitry Andric } 32260b57cec5SDimitry Andric if (Ty->isVectorTy()) { 32270b57cec5SDimitry Andric EVT ObjectVT = getValueType(DL, Ty); 32280b57cec5SDimitry Andric unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); 32290b57cec5SDimitry Andric for (unsigned parti = 0; parti < NumRegs; ++parti) { 32300b57cec5SDimitry Andric InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 32310b57cec5SDimitry Andric ++InsIdx; 32320b57cec5SDimitry Andric } 32330b57cec5SDimitry Andric if (NumRegs > 0) 32340b57cec5SDimitry Andric --InsIdx; 32350b57cec5SDimitry Andric continue; 32360b57cec5SDimitry Andric } 32370b57cec5SDimitry Andric InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 32380b57cec5SDimitry Andric continue; 32390b57cec5SDimitry Andric } 32400b57cec5SDimitry Andric 32410fca6ea1SDimitry Andric // In the following cases, assign a node order of "i+1" 32420b57cec5SDimitry Andric // to newly created nodes. The SDNodes for params have to 32430b57cec5SDimitry Andric // appear in the same order as their order of appearance 32440fca6ea1SDimitry Andric // in the original function. "i+1" holds that order. 3245349cc55cSDimitry Andric if (!PAL.hasParamAttr(i, Attribute::ByVal)) { 32460b57cec5SDimitry Andric bool aggregateIsPacked = false; 32470b57cec5SDimitry Andric if (StructType *STy = dyn_cast<StructType>(Ty)) 32480b57cec5SDimitry Andric aggregateIsPacked = STy->isPacked(); 32490b57cec5SDimitry Andric 32500b57cec5SDimitry Andric SmallVector<EVT, 16> VTs; 32510b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets; 32520b57cec5SDimitry Andric ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0); 325306c3fb27SDimitry Andric if (VTs.empty()) 325406c3fb27SDimitry Andric report_fatal_error("Empty parameter types are not supported"); 325506c3fb27SDimitry Andric 32560fca6ea1SDimitry Andric Align ArgAlign = getFunctionArgumentAlignment( 32570fca6ea1SDimitry Andric F, Ty, i + AttributeList::FirstArgIndex, DL); 32580fca6ea1SDimitry Andric auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); 32590b57cec5SDimitry Andric 32600fca6ea1SDimitry Andric SDValue Arg = getParamSymbol(DAG, i, PtrVT); 32610b57cec5SDimitry Andric int VecIdx = -1; // Index of the first element of the current vector. 32620b57cec5SDimitry Andric for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) { 32630b57cec5SDimitry Andric if (VectorInfo[parti] & PVF_FIRST) { 32640b57cec5SDimitry Andric assert(VecIdx == -1 && "Orphaned vector."); 32650b57cec5SDimitry Andric VecIdx = parti; 32660b57cec5SDimitry Andric } 32670b57cec5SDimitry Andric 32680b57cec5SDimitry Andric // That's the last element of this store op. 32690b57cec5SDimitry Andric if (VectorInfo[parti] & PVF_LAST) { 32700b57cec5SDimitry Andric unsigned NumElts = parti - VecIdx + 1; 32710b57cec5SDimitry Andric EVT EltVT = VTs[parti]; 32720b57cec5SDimitry Andric // i1 is loaded/stored as i8. 32730b57cec5SDimitry Andric EVT LoadVT = EltVT; 32740b57cec5SDimitry Andric if (EltVT == MVT::i1) 32750b57cec5SDimitry Andric LoadVT = MVT::i8; 32765f757f3fSDimitry Andric else if (Isv2x16VT(EltVT) || EltVT == MVT::v4i8) 32770b57cec5SDimitry Andric // getLoad needs a vector type, but it can't handle 327806c3fb27SDimitry Andric // vectors which contain v2f16 or v2bf16 elements. So we must load 32790b57cec5SDimitry Andric // using i32 here and then bitcast back. 32800b57cec5SDimitry Andric LoadVT = MVT::i32; 32810b57cec5SDimitry Andric 32820b57cec5SDimitry Andric EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts); 32830b57cec5SDimitry Andric SDValue VecAddr = 32840b57cec5SDimitry Andric DAG.getNode(ISD::ADD, dl, PtrVT, Arg, 32850b57cec5SDimitry Andric DAG.getConstant(Offsets[VecIdx], dl, PtrVT)); 32860b57cec5SDimitry Andric Value *srcValue = Constant::getNullValue(PointerType::get( 32870b57cec5SDimitry Andric EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM)); 32880fca6ea1SDimitry Andric 32890fca6ea1SDimitry Andric const MaybeAlign PartAlign = [&]() -> MaybeAlign { 32900fca6ea1SDimitry Andric if (aggregateIsPacked) 32910fca6ea1SDimitry Andric return Align(1); 32920fca6ea1SDimitry Andric if (NumElts != 1) 32930fca6ea1SDimitry Andric return std::nullopt; 32940fca6ea1SDimitry Andric Align PartAlign = 32950fca6ea1SDimitry Andric DL.getABITypeAlign(EltVT.getTypeForEVT(F->getContext())); 32960fca6ea1SDimitry Andric return commonAlignment(PartAlign, Offsets[parti]); 32970fca6ea1SDimitry Andric }(); 3298bdd1243dSDimitry Andric SDValue P = DAG.getLoad(VecVT, dl, Root, VecAddr, 32990fca6ea1SDimitry Andric MachinePointerInfo(srcValue), PartAlign, 33000b57cec5SDimitry Andric MachineMemOperand::MODereferenceable | 33010b57cec5SDimitry Andric MachineMemOperand::MOInvariant); 33020b57cec5SDimitry Andric if (P.getNode()) 33030fca6ea1SDimitry Andric P.getNode()->setIROrder(i + 1); 33040b57cec5SDimitry Andric for (unsigned j = 0; j < NumElts; ++j) { 33050b57cec5SDimitry Andric SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P, 33060b57cec5SDimitry Andric DAG.getIntPtrConstant(j, dl)); 33070b57cec5SDimitry Andric // We've loaded i1 as an i8 and now must truncate it back to i1 33080b57cec5SDimitry Andric if (EltVT == MVT::i1) 33090b57cec5SDimitry Andric Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt); 33100b57cec5SDimitry Andric // v2f16 was loaded as an i32. Now we must bitcast it back. 33115f757f3fSDimitry Andric else if (EltVT != LoadVT) 331206c3fb27SDimitry Andric Elt = DAG.getNode(ISD::BITCAST, dl, EltVT, Elt); 3313fcaf7f86SDimitry Andric 3314fcaf7f86SDimitry Andric // If a promoted integer type is used, truncate down to the original 3315fcaf7f86SDimitry Andric MVT PromotedVT; 3316fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) { 3317fcaf7f86SDimitry Andric Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); 3318fcaf7f86SDimitry Andric } 3319fcaf7f86SDimitry Andric 33200b57cec5SDimitry Andric // Extend the element if necessary (e.g. an i8 is loaded 33210b57cec5SDimitry Andric // into an i16 register) 33220b57cec5SDimitry Andric if (Ins[InsIdx].VT.isInteger() && 3323e8d8bef9SDimitry Andric Ins[InsIdx].VT.getFixedSizeInBits() > 3324e8d8bef9SDimitry Andric LoadVT.getFixedSizeInBits()) { 33250b57cec5SDimitry Andric unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND 33260b57cec5SDimitry Andric : ISD::ZERO_EXTEND; 33270b57cec5SDimitry Andric Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt); 33280b57cec5SDimitry Andric } 33290b57cec5SDimitry Andric InVals.push_back(Elt); 33300b57cec5SDimitry Andric } 33310b57cec5SDimitry Andric 33320b57cec5SDimitry Andric // Reset vector tracking state. 33330b57cec5SDimitry Andric VecIdx = -1; 33340b57cec5SDimitry Andric } 33350b57cec5SDimitry Andric ++InsIdx; 33360b57cec5SDimitry Andric } 33370b57cec5SDimitry Andric if (VTs.size() > 0) 33380b57cec5SDimitry Andric --InsIdx; 33390b57cec5SDimitry Andric continue; 33400b57cec5SDimitry Andric } 33410b57cec5SDimitry Andric 33420b57cec5SDimitry Andric // Param has ByVal attribute 33430b57cec5SDimitry Andric // Return MoveParam(param symbol). 33440b57cec5SDimitry Andric // Ideally, the param symbol can be returned directly, 33450b57cec5SDimitry Andric // but when SDNode builder decides to use it in a CopyToReg(), 33460b57cec5SDimitry Andric // machine instruction fails because TargetExternalSymbol 33470b57cec5SDimitry Andric // (not lowered) is target dependent, and CopyToReg assumes 33480b57cec5SDimitry Andric // the source is lowered. 33490b57cec5SDimitry Andric EVT ObjectVT = getValueType(DL, Ty); 33500b57cec5SDimitry Andric assert(ObjectVT == Ins[InsIdx].VT && 33510b57cec5SDimitry Andric "Ins type did not match function type"); 33520fca6ea1SDimitry Andric SDValue Arg = getParamSymbol(DAG, i, PtrVT); 33530b57cec5SDimitry Andric SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 33540b57cec5SDimitry Andric if (p.getNode()) 33550fca6ea1SDimitry Andric p.getNode()->setIROrder(i + 1); 33560b57cec5SDimitry Andric InVals.push_back(p); 33570b57cec5SDimitry Andric } 33580b57cec5SDimitry Andric 33590b57cec5SDimitry Andric if (!OutChains.empty()) 33600b57cec5SDimitry Andric DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); 33610b57cec5SDimitry Andric 33620b57cec5SDimitry Andric return Chain; 33630b57cec5SDimitry Andric } 33640b57cec5SDimitry Andric 33650fca6ea1SDimitry Andric // Use byte-store when the param adress of the return value is unaligned. 33660fca6ea1SDimitry Andric // This may happen when the return value is a field of a packed structure. 33670fca6ea1SDimitry Andric static SDValue LowerUnalignedStoreRet(SelectionDAG &DAG, SDValue Chain, 33680fca6ea1SDimitry Andric uint64_t Offset, EVT ElementType, 33690fca6ea1SDimitry Andric SDValue RetVal, const SDLoc &dl) { 33700fca6ea1SDimitry Andric // Bit logic only works on integer types 33710fca6ea1SDimitry Andric if (adjustElementType(ElementType)) 33720fca6ea1SDimitry Andric RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal); 33730fca6ea1SDimitry Andric 33740fca6ea1SDimitry Andric // Store each byte 33750fca6ea1SDimitry Andric for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) { 33760fca6ea1SDimitry Andric // Shift the byte to the last byte position 33770fca6ea1SDimitry Andric SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, RetVal, 33780fca6ea1SDimitry Andric DAG.getConstant(i * 8, dl, MVT::i32)); 33790fca6ea1SDimitry Andric SDValue StoreOperands[] = {Chain, DAG.getConstant(Offset + i, dl, MVT::i32), 33800fca6ea1SDimitry Andric ShiftVal}; 33810fca6ea1SDimitry Andric // Trunc store only the last byte by using 33820fca6ea1SDimitry Andric // st.param.b8 33830fca6ea1SDimitry Andric // The register type can be larger than b8. 33840fca6ea1SDimitry Andric Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 33850fca6ea1SDimitry Andric DAG.getVTList(MVT::Other), StoreOperands, 33860fca6ea1SDimitry Andric MVT::i8, MachinePointerInfo(), std::nullopt, 33870fca6ea1SDimitry Andric MachineMemOperand::MOStore); 33880fca6ea1SDimitry Andric } 33890fca6ea1SDimitry Andric return Chain; 33900fca6ea1SDimitry Andric } 33910fca6ea1SDimitry Andric 33920b57cec5SDimitry Andric SDValue 33930b57cec5SDimitry Andric NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 33940b57cec5SDimitry Andric bool isVarArg, 33950b57cec5SDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, 33960b57cec5SDimitry Andric const SmallVectorImpl<SDValue> &OutVals, 33970b57cec5SDimitry Andric const SDLoc &dl, SelectionDAG &DAG) const { 339881ad6265SDimitry Andric const MachineFunction &MF = DAG.getMachineFunction(); 339981ad6265SDimitry Andric const Function &F = MF.getFunction(); 34000b57cec5SDimitry Andric Type *RetTy = MF.getFunction().getReturnType(); 34010b57cec5SDimitry Andric 34020b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20); 34030b57cec5SDimitry Andric assert(isABI && "Non-ABI compilation is not supported"); 34040b57cec5SDimitry Andric if (!isABI) 34050b57cec5SDimitry Andric return Chain; 34060b57cec5SDimitry Andric 3407fe6060f1SDimitry Andric const DataLayout &DL = DAG.getDataLayout(); 3408fcaf7f86SDimitry Andric SmallVector<SDValue, 16> PromotedOutVals; 34090b57cec5SDimitry Andric SmallVector<EVT, 16> VTs; 34100b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets; 34110b57cec5SDimitry Andric ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); 34120b57cec5SDimitry Andric assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); 34130b57cec5SDimitry Andric 3414fcaf7f86SDimitry Andric for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 3415fcaf7f86SDimitry Andric SDValue PromotedOutVal = OutVals[i]; 3416fcaf7f86SDimitry Andric MVT PromotedVT; 3417fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(VTs[i], &PromotedVT)) { 3418fcaf7f86SDimitry Andric VTs[i] = EVT(PromotedVT); 3419fcaf7f86SDimitry Andric } 3420fcaf7f86SDimitry Andric if (PromoteScalarIntegerPTX(PromotedOutVal.getValueType(), &PromotedVT)) { 3421fcaf7f86SDimitry Andric llvm::ISD::NodeType Ext = 3422fcaf7f86SDimitry Andric Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 3423fcaf7f86SDimitry Andric PromotedOutVal = DAG.getNode(Ext, dl, PromotedVT, PromotedOutVal); 3424fcaf7f86SDimitry Andric } 3425fcaf7f86SDimitry Andric PromotedOutVals.push_back(PromotedOutVal); 3426fcaf7f86SDimitry Andric } 3427fcaf7f86SDimitry Andric 34280b57cec5SDimitry Andric auto VectorInfo = VectorizePTXValueVTs( 342981ad6265SDimitry Andric VTs, Offsets, 343081ad6265SDimitry Andric RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL) 343181ad6265SDimitry Andric : Align(1)); 34320b57cec5SDimitry Andric 34330b57cec5SDimitry Andric // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than 34340b57cec5SDimitry Andric // 32-bits are sign extended or zero extended, depending on whether 34350b57cec5SDimitry Andric // they are signed or unsigned types. 34360b57cec5SDimitry Andric bool ExtendIntegerRetVal = 34370b57cec5SDimitry Andric RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; 34380b57cec5SDimitry Andric 34390b57cec5SDimitry Andric SmallVector<SDValue, 6> StoreOperands; 34400b57cec5SDimitry Andric for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 3441fcaf7f86SDimitry Andric SDValue OutVal = OutVals[i]; 3442fcaf7f86SDimitry Andric SDValue RetVal = PromotedOutVals[i]; 3443fcaf7f86SDimitry Andric 34440b57cec5SDimitry Andric if (ExtendIntegerRetVal) { 34450b57cec5SDimitry Andric RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND 34460b57cec5SDimitry Andric : ISD::ZERO_EXTEND, 34470b57cec5SDimitry Andric dl, MVT::i32, RetVal); 3448fcaf7f86SDimitry Andric } else if (OutVal.getValueSizeInBits() < 16) { 34490b57cec5SDimitry Andric // Use 16-bit registers for small load-stores as it's the 34500b57cec5SDimitry Andric // smallest general purpose register size supported by NVPTX. 34510b57cec5SDimitry Andric RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal); 34520b57cec5SDimitry Andric } 34530b57cec5SDimitry Andric 34540fca6ea1SDimitry Andric // If we have a PVF_SCALAR entry, it may not even be sufficiently aligned 34550fca6ea1SDimitry Andric // for a scalar store. In such cases, fall back to byte stores. 34560fca6ea1SDimitry Andric if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType()) { 34570fca6ea1SDimitry Andric EVT ElementType = ExtendIntegerRetVal ? MVT::i32 : VTs[i]; 34580fca6ea1SDimitry Andric Align ElementTypeAlign = 34590fca6ea1SDimitry Andric DL.getABITypeAlign(ElementType.getTypeForEVT(RetTy->getContext())); 34600fca6ea1SDimitry Andric Align ElementAlign = 34610fca6ea1SDimitry Andric commonAlignment(DL.getABITypeAlign(RetTy), Offsets[i]); 34620fca6ea1SDimitry Andric if (ElementAlign < ElementTypeAlign) { 34630fca6ea1SDimitry Andric assert(StoreOperands.empty() && "Orphaned operand list."); 34640fca6ea1SDimitry Andric Chain = LowerUnalignedStoreRet(DAG, Chain, Offsets[i], ElementType, 34650fca6ea1SDimitry Andric RetVal, dl); 34660fca6ea1SDimitry Andric 34670fca6ea1SDimitry Andric // The call to LowerUnalignedStoreRet inserted the necessary SDAG nodes 34680fca6ea1SDimitry Andric // into the graph, so just move on to the next element. 34690fca6ea1SDimitry Andric continue; 34700fca6ea1SDimitry Andric } 34710fca6ea1SDimitry Andric } 34720fca6ea1SDimitry Andric 34730fca6ea1SDimitry Andric // New load/store. Record chain and offset operands. 34740fca6ea1SDimitry Andric if (VectorInfo[i] & PVF_FIRST) { 34750fca6ea1SDimitry Andric assert(StoreOperands.empty() && "Orphaned operand list."); 34760fca6ea1SDimitry Andric StoreOperands.push_back(Chain); 34770fca6ea1SDimitry Andric StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32)); 34780fca6ea1SDimitry Andric } 34790fca6ea1SDimitry Andric 34800b57cec5SDimitry Andric // Record the value to return. 34810b57cec5SDimitry Andric StoreOperands.push_back(RetVal); 34820b57cec5SDimitry Andric 34830b57cec5SDimitry Andric // That's the last element of this store op. 34840b57cec5SDimitry Andric if (VectorInfo[i] & PVF_LAST) { 34850b57cec5SDimitry Andric NVPTXISD::NodeType Op; 34860b57cec5SDimitry Andric unsigned NumElts = StoreOperands.size() - 2; 34870b57cec5SDimitry Andric switch (NumElts) { 34880b57cec5SDimitry Andric case 1: 34890b57cec5SDimitry Andric Op = NVPTXISD::StoreRetval; 34900b57cec5SDimitry Andric break; 34910b57cec5SDimitry Andric case 2: 34920b57cec5SDimitry Andric Op = NVPTXISD::StoreRetvalV2; 34930b57cec5SDimitry Andric break; 34940b57cec5SDimitry Andric case 4: 34950b57cec5SDimitry Andric Op = NVPTXISD::StoreRetvalV4; 34960b57cec5SDimitry Andric break; 34970b57cec5SDimitry Andric default: 34980b57cec5SDimitry Andric llvm_unreachable("Invalid vector info."); 34990b57cec5SDimitry Andric } 35000b57cec5SDimitry Andric 35010b57cec5SDimitry Andric // Adjust type of load/store op if we've extended the scalar 35020b57cec5SDimitry Andric // return value. 35030b57cec5SDimitry Andric EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i]; 35045ffd83dbSDimitry Andric Chain = DAG.getMemIntrinsicNode( 35055ffd83dbSDimitry Andric Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType, 35065ffd83dbSDimitry Andric MachinePointerInfo(), Align(1), MachineMemOperand::MOStore); 35070b57cec5SDimitry Andric // Cleanup vector state. 35080b57cec5SDimitry Andric StoreOperands.clear(); 35090b57cec5SDimitry Andric } 35100b57cec5SDimitry Andric } 35110b57cec5SDimitry Andric 351206c3fb27SDimitry Andric return DAG.getNode(NVPTXISD::RET_GLUE, dl, MVT::Other, Chain); 35130b57cec5SDimitry Andric } 35140b57cec5SDimitry Andric 35150b57cec5SDimitry Andric void NVPTXTargetLowering::LowerAsmOperandForConstraint( 35165f757f3fSDimitry Andric SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 35170b57cec5SDimitry Andric SelectionDAG &DAG) const { 35185f757f3fSDimitry Andric if (Constraint.size() > 1) 35190b57cec5SDimitry Andric return; 35200b57cec5SDimitry Andric TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 35210b57cec5SDimitry Andric } 35220b57cec5SDimitry Andric 35230b57cec5SDimitry Andric static unsigned getOpcForTextureInstr(unsigned Intrinsic) { 35240b57cec5SDimitry Andric switch (Intrinsic) { 35250b57cec5SDimitry Andric default: 35260b57cec5SDimitry Andric return 0; 35270b57cec5SDimitry Andric 35280b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4f32_s32: 35290b57cec5SDimitry Andric return NVPTXISD::Tex1DFloatS32; 35300b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4f32_f32: 35310b57cec5SDimitry Andric return NVPTXISD::Tex1DFloatFloat; 35320b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 35330b57cec5SDimitry Andric return NVPTXISD::Tex1DFloatFloatLevel; 35340b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 35350b57cec5SDimitry Andric return NVPTXISD::Tex1DFloatFloatGrad; 35360b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4s32_s32: 35370b57cec5SDimitry Andric return NVPTXISD::Tex1DS32S32; 35380b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4s32_f32: 35390b57cec5SDimitry Andric return NVPTXISD::Tex1DS32Float; 35400b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 35410b57cec5SDimitry Andric return NVPTXISD::Tex1DS32FloatLevel; 35420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 35430b57cec5SDimitry Andric return NVPTXISD::Tex1DS32FloatGrad; 35440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4u32_s32: 35450b57cec5SDimitry Andric return NVPTXISD::Tex1DU32S32; 35460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4u32_f32: 35470b57cec5SDimitry Andric return NVPTXISD::Tex1DU32Float; 35480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 35490b57cec5SDimitry Andric return NVPTXISD::Tex1DU32FloatLevel; 35500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 35510b57cec5SDimitry Andric return NVPTXISD::Tex1DU32FloatGrad; 35520b57cec5SDimitry Andric 35530b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 35540b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayFloatS32; 35550b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 35560b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayFloatFloat; 35570b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 35580b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayFloatFloatLevel; 35590b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 35600b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayFloatFloatGrad; 35610b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 35620b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayS32S32; 35630b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 35640b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayS32Float; 35650b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 35660b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayS32FloatLevel; 35670b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 35680b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayS32FloatGrad; 35690b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 35700b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayU32S32; 35710b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 35720b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayU32Float; 35730b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 35740b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayU32FloatLevel; 35750b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 35760b57cec5SDimitry Andric return NVPTXISD::Tex1DArrayU32FloatGrad; 35770b57cec5SDimitry Andric 35780b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4f32_s32: 35790b57cec5SDimitry Andric return NVPTXISD::Tex2DFloatS32; 35800b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4f32_f32: 35810b57cec5SDimitry Andric return NVPTXISD::Tex2DFloatFloat; 35820b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 35830b57cec5SDimitry Andric return NVPTXISD::Tex2DFloatFloatLevel; 35840b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 35850b57cec5SDimitry Andric return NVPTXISD::Tex2DFloatFloatGrad; 35860b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4s32_s32: 35870b57cec5SDimitry Andric return NVPTXISD::Tex2DS32S32; 35880b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4s32_f32: 35890b57cec5SDimitry Andric return NVPTXISD::Tex2DS32Float; 35900b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 35910b57cec5SDimitry Andric return NVPTXISD::Tex2DS32FloatLevel; 35920b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 35930b57cec5SDimitry Andric return NVPTXISD::Tex2DS32FloatGrad; 35940b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4u32_s32: 35950b57cec5SDimitry Andric return NVPTXISD::Tex2DU32S32; 35960b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4u32_f32: 35970b57cec5SDimitry Andric return NVPTXISD::Tex2DU32Float; 35980b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 35990b57cec5SDimitry Andric return NVPTXISD::Tex2DU32FloatLevel; 36000b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 36010b57cec5SDimitry Andric return NVPTXISD::Tex2DU32FloatGrad; 36020b57cec5SDimitry Andric 36030b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 36040b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayFloatS32; 36050b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 36060b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayFloatFloat; 36070b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 36080b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayFloatFloatLevel; 36090b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 36100b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayFloatFloatGrad; 36110b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 36120b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayS32S32; 36130b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 36140b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayS32Float; 36150b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 36160b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayS32FloatLevel; 36170b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 36180b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayS32FloatGrad; 36190b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 36200b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayU32S32; 36210b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 36220b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayU32Float; 36230b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 36240b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayU32FloatLevel; 36250b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 36260b57cec5SDimitry Andric return NVPTXISD::Tex2DArrayU32FloatGrad; 36270b57cec5SDimitry Andric 36280b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4f32_s32: 36290b57cec5SDimitry Andric return NVPTXISD::Tex3DFloatS32; 36300b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4f32_f32: 36310b57cec5SDimitry Andric return NVPTXISD::Tex3DFloatFloat; 36320b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 36330b57cec5SDimitry Andric return NVPTXISD::Tex3DFloatFloatLevel; 36340b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 36350b57cec5SDimitry Andric return NVPTXISD::Tex3DFloatFloatGrad; 36360b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4s32_s32: 36370b57cec5SDimitry Andric return NVPTXISD::Tex3DS32S32; 36380b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4s32_f32: 36390b57cec5SDimitry Andric return NVPTXISD::Tex3DS32Float; 36400b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 36410b57cec5SDimitry Andric return NVPTXISD::Tex3DS32FloatLevel; 36420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 36430b57cec5SDimitry Andric return NVPTXISD::Tex3DS32FloatGrad; 36440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4u32_s32: 36450b57cec5SDimitry Andric return NVPTXISD::Tex3DU32S32; 36460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4u32_f32: 36470b57cec5SDimitry Andric return NVPTXISD::Tex3DU32Float; 36480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 36490b57cec5SDimitry Andric return NVPTXISD::Tex3DU32FloatLevel; 36500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 36510b57cec5SDimitry Andric return NVPTXISD::Tex3DU32FloatGrad; 36520b57cec5SDimitry Andric 36530b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4f32_f32: 36540b57cec5SDimitry Andric return NVPTXISD::TexCubeFloatFloat; 36550b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 36560b57cec5SDimitry Andric return NVPTXISD::TexCubeFloatFloatLevel; 36570b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4s32_f32: 36580b57cec5SDimitry Andric return NVPTXISD::TexCubeS32Float; 36590b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 36600b57cec5SDimitry Andric return NVPTXISD::TexCubeS32FloatLevel; 36610b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4u32_f32: 36620b57cec5SDimitry Andric return NVPTXISD::TexCubeU32Float; 36630b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 36640b57cec5SDimitry Andric return NVPTXISD::TexCubeU32FloatLevel; 36650b57cec5SDimitry Andric 36660b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 36670b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayFloatFloat; 36680b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 36690b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayFloatFloatLevel; 36700b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 36710b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayS32Float; 36720b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 36730b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayS32FloatLevel; 36740b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 36750b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayU32Float; 36760b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 36770b57cec5SDimitry Andric return NVPTXISD::TexCubeArrayU32FloatLevel; 36780b57cec5SDimitry Andric 36790b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 36800b57cec5SDimitry Andric return NVPTXISD::Tld4R2DFloatFloat; 36810b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 36820b57cec5SDimitry Andric return NVPTXISD::Tld4G2DFloatFloat; 36830b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 36840b57cec5SDimitry Andric return NVPTXISD::Tld4B2DFloatFloat; 36850b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 36860b57cec5SDimitry Andric return NVPTXISD::Tld4A2DFloatFloat; 36870b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 36880b57cec5SDimitry Andric return NVPTXISD::Tld4R2DS64Float; 36890b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 36900b57cec5SDimitry Andric return NVPTXISD::Tld4G2DS64Float; 36910b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 36920b57cec5SDimitry Andric return NVPTXISD::Tld4B2DS64Float; 36930b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 36940b57cec5SDimitry Andric return NVPTXISD::Tld4A2DS64Float; 36950b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 36960b57cec5SDimitry Andric return NVPTXISD::Tld4R2DU64Float; 36970b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 36980b57cec5SDimitry Andric return NVPTXISD::Tld4G2DU64Float; 36990b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 37000b57cec5SDimitry Andric return NVPTXISD::Tld4B2DU64Float; 37010b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 37020b57cec5SDimitry Andric return NVPTXISD::Tld4A2DU64Float; 37030b57cec5SDimitry Andric 37040b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 37050b57cec5SDimitry Andric return NVPTXISD::TexUnified1DFloatS32; 37060b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 37070b57cec5SDimitry Andric return NVPTXISD::TexUnified1DFloatFloat; 37080b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 37090b57cec5SDimitry Andric return NVPTXISD::TexUnified1DFloatFloatLevel; 37100b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 37110b57cec5SDimitry Andric return NVPTXISD::TexUnified1DFloatFloatGrad; 37120b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 37130b57cec5SDimitry Andric return NVPTXISD::TexUnified1DS32S32; 37140b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 37150b57cec5SDimitry Andric return NVPTXISD::TexUnified1DS32Float; 37160b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 37170b57cec5SDimitry Andric return NVPTXISD::TexUnified1DS32FloatLevel; 37180b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 37190b57cec5SDimitry Andric return NVPTXISD::TexUnified1DS32FloatGrad; 37200b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 37210b57cec5SDimitry Andric return NVPTXISD::TexUnified1DU32S32; 37220b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 37230b57cec5SDimitry Andric return NVPTXISD::TexUnified1DU32Float; 37240b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 37250b57cec5SDimitry Andric return NVPTXISD::TexUnified1DU32FloatLevel; 37260b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 37270b57cec5SDimitry Andric return NVPTXISD::TexUnified1DU32FloatGrad; 37280b57cec5SDimitry Andric 37290b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 37300b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayFloatS32; 37310b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 37320b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayFloatFloat; 37330b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 37340b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayFloatFloatLevel; 37350b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 37360b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayFloatFloatGrad; 37370b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 37380b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayS32S32; 37390b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 37400b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayS32Float; 37410b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 37420b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayS32FloatLevel; 37430b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 37440b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayS32FloatGrad; 37450b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 37460b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayU32S32; 37470b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 37480b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayU32Float; 37490b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 37500b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayU32FloatLevel; 37510b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 37520b57cec5SDimitry Andric return NVPTXISD::TexUnified1DArrayU32FloatGrad; 37530b57cec5SDimitry Andric 37540b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 37550b57cec5SDimitry Andric return NVPTXISD::TexUnified2DFloatS32; 37560b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 37570b57cec5SDimitry Andric return NVPTXISD::TexUnified2DFloatFloat; 37580b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 37590b57cec5SDimitry Andric return NVPTXISD::TexUnified2DFloatFloatLevel; 37600b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 37610b57cec5SDimitry Andric return NVPTXISD::TexUnified2DFloatFloatGrad; 37620b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 37630b57cec5SDimitry Andric return NVPTXISD::TexUnified2DS32S32; 37640b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 37650b57cec5SDimitry Andric return NVPTXISD::TexUnified2DS32Float; 37660b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 37670b57cec5SDimitry Andric return NVPTXISD::TexUnified2DS32FloatLevel; 37680b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 37690b57cec5SDimitry Andric return NVPTXISD::TexUnified2DS32FloatGrad; 37700b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 37710b57cec5SDimitry Andric return NVPTXISD::TexUnified2DU32S32; 37720b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 37730b57cec5SDimitry Andric return NVPTXISD::TexUnified2DU32Float; 37740b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 37750b57cec5SDimitry Andric return NVPTXISD::TexUnified2DU32FloatLevel; 37760b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 37770b57cec5SDimitry Andric return NVPTXISD::TexUnified2DU32FloatGrad; 37780b57cec5SDimitry Andric 37790b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 37800b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayFloatS32; 37810b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 37820b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayFloatFloat; 37830b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 37840b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayFloatFloatLevel; 37850b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 37860b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayFloatFloatGrad; 37870b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 37880b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayS32S32; 37890b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 37900b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayS32Float; 37910b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 37920b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayS32FloatLevel; 37930b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 37940b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayS32FloatGrad; 37950b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 37960b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayU32S32; 37970b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 37980b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayU32Float; 37990b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 38000b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayU32FloatLevel; 38010b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 38020b57cec5SDimitry Andric return NVPTXISD::TexUnified2DArrayU32FloatGrad; 38030b57cec5SDimitry Andric 38040b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 38050b57cec5SDimitry Andric return NVPTXISD::TexUnified3DFloatS32; 38060b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 38070b57cec5SDimitry Andric return NVPTXISD::TexUnified3DFloatFloat; 38080b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 38090b57cec5SDimitry Andric return NVPTXISD::TexUnified3DFloatFloatLevel; 38100b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 38110b57cec5SDimitry Andric return NVPTXISD::TexUnified3DFloatFloatGrad; 38120b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 38130b57cec5SDimitry Andric return NVPTXISD::TexUnified3DS32S32; 38140b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 38150b57cec5SDimitry Andric return NVPTXISD::TexUnified3DS32Float; 38160b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 38170b57cec5SDimitry Andric return NVPTXISD::TexUnified3DS32FloatLevel; 38180b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 38190b57cec5SDimitry Andric return NVPTXISD::TexUnified3DS32FloatGrad; 38200b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 38210b57cec5SDimitry Andric return NVPTXISD::TexUnified3DU32S32; 38220b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 38230b57cec5SDimitry Andric return NVPTXISD::TexUnified3DU32Float; 38240b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 38250b57cec5SDimitry Andric return NVPTXISD::TexUnified3DU32FloatLevel; 38260b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 38270b57cec5SDimitry Andric return NVPTXISD::TexUnified3DU32FloatGrad; 38280b57cec5SDimitry Andric 38290b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 38300b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeFloatFloat; 38310b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 38320b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeFloatFloatLevel; 38330b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 38340b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeS32Float; 38350b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 38360b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeS32FloatLevel; 38370b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 38380b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeU32Float; 38390b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 38400b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeU32FloatLevel; 38410b57cec5SDimitry Andric 38420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 38430b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayFloatFloat; 38440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 38450b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; 38460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 38470b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayS32Float; 38480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 38490b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; 38500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 38510b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayU32Float; 38520b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 38530b57cec5SDimitry Andric return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; 38540b57cec5SDimitry Andric 38557a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32: 38567a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeFloatFloatGrad; 38577a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32: 38587a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeS32FloatGrad; 38597a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32: 38607a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeU32FloatGrad; 38617a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32: 38627a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad; 38637a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32: 38647a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeArrayS32FloatGrad; 38657a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32: 38667a6dacacSDimitry Andric return NVPTXISD::TexUnifiedCubeArrayU32FloatGrad; 38677a6dacacSDimitry Andric 38680b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 38690b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedR2DFloatFloat; 38700b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 38710b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedG2DFloatFloat; 38720b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 38730b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedB2DFloatFloat; 38740b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: 38750b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedA2DFloatFloat; 38760b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 38770b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedR2DS64Float; 38780b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 38790b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedG2DS64Float; 38800b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 38810b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedB2DS64Float; 38820b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 38830b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedA2DS64Float; 38840b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 38850b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedR2DU64Float; 38860b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 38870b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedG2DU64Float; 38880b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 38890b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedB2DU64Float; 38900b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: 38910b57cec5SDimitry Andric return NVPTXISD::Tld4UnifiedA2DU64Float; 38920b57cec5SDimitry Andric } 38930b57cec5SDimitry Andric } 38940b57cec5SDimitry Andric 38950b57cec5SDimitry Andric static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { 38960b57cec5SDimitry Andric switch (Intrinsic) { 38970b57cec5SDimitry Andric default: 38980b57cec5SDimitry Andric return 0; 38990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_clamp: 39000b57cec5SDimitry Andric return NVPTXISD::Suld1DI8Clamp; 39010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_clamp: 39020b57cec5SDimitry Andric return NVPTXISD::Suld1DI16Clamp; 39030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_clamp: 39040b57cec5SDimitry Andric return NVPTXISD::Suld1DI32Clamp; 39050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_clamp: 39060b57cec5SDimitry Andric return NVPTXISD::Suld1DI64Clamp; 39070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_clamp: 39080b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I8Clamp; 39090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_clamp: 39100b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I16Clamp; 39110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_clamp: 39120b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I32Clamp; 39130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_clamp: 39140b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I64Clamp; 39150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_clamp: 39160b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I8Clamp; 39170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_clamp: 39180b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I16Clamp; 39190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_clamp: 39200b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I32Clamp; 39210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_clamp: 39220b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI8Clamp; 39230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_clamp: 39240b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI16Clamp; 39250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_clamp: 39260b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI32Clamp; 39270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_clamp: 39280b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI64Clamp; 39290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 39300b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I8Clamp; 39310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 39320b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I16Clamp; 39330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 39340b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I32Clamp; 39350b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 39360b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I64Clamp; 39370b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 39380b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I8Clamp; 39390b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 39400b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I16Clamp; 39410b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 39420b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I32Clamp; 39430b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_clamp: 39440b57cec5SDimitry Andric return NVPTXISD::Suld2DI8Clamp; 39450b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_clamp: 39460b57cec5SDimitry Andric return NVPTXISD::Suld2DI16Clamp; 39470b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_clamp: 39480b57cec5SDimitry Andric return NVPTXISD::Suld2DI32Clamp; 39490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_clamp: 39500b57cec5SDimitry Andric return NVPTXISD::Suld2DI64Clamp; 39510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_clamp: 39520b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I8Clamp; 39530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_clamp: 39540b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I16Clamp; 39550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_clamp: 39560b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I32Clamp; 39570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_clamp: 39580b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I64Clamp; 39590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_clamp: 39600b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I8Clamp; 39610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_clamp: 39620b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I16Clamp; 39630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_clamp: 39640b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I32Clamp; 39650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_clamp: 39660b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI8Clamp; 39670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_clamp: 39680b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI16Clamp; 39690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_clamp: 39700b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI32Clamp; 39710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_clamp: 39720b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI64Clamp; 39730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 39740b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I8Clamp; 39750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 39760b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I16Clamp; 39770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 39780b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I32Clamp; 39790b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 39800b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I64Clamp; 39810b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 39820b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I8Clamp; 39830b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 39840b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I16Clamp; 39850b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 39860b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I32Clamp; 39870b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_clamp: 39880b57cec5SDimitry Andric return NVPTXISD::Suld3DI8Clamp; 39890b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_clamp: 39900b57cec5SDimitry Andric return NVPTXISD::Suld3DI16Clamp; 39910b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_clamp: 39920b57cec5SDimitry Andric return NVPTXISD::Suld3DI32Clamp; 39930b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_clamp: 39940b57cec5SDimitry Andric return NVPTXISD::Suld3DI64Clamp; 39950b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_clamp: 39960b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I8Clamp; 39970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_clamp: 39980b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I16Clamp; 39990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_clamp: 40000b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I32Clamp; 40010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_clamp: 40020b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I64Clamp; 40030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_clamp: 40040b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I8Clamp; 40050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_clamp: 40060b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I16Clamp; 40070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_clamp: 40080b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I32Clamp; 40090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_trap: 40100b57cec5SDimitry Andric return NVPTXISD::Suld1DI8Trap; 40110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_trap: 40120b57cec5SDimitry Andric return NVPTXISD::Suld1DI16Trap; 40130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_trap: 40140b57cec5SDimitry Andric return NVPTXISD::Suld1DI32Trap; 40150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_trap: 40160b57cec5SDimitry Andric return NVPTXISD::Suld1DI64Trap; 40170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_trap: 40180b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I8Trap; 40190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_trap: 40200b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I16Trap; 40210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_trap: 40220b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I32Trap; 40230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_trap: 40240b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I64Trap; 40250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_trap: 40260b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I8Trap; 40270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_trap: 40280b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I16Trap; 40290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_trap: 40300b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I32Trap; 40310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_trap: 40320b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI8Trap; 40330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_trap: 40340b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI16Trap; 40350b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_trap: 40360b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI32Trap; 40370b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_trap: 40380b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI64Trap; 40390b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 40400b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I8Trap; 40410b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 40420b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I16Trap; 40430b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 40440b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I32Trap; 40450b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 40460b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I64Trap; 40470b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 40480b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I8Trap; 40490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 40500b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I16Trap; 40510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 40520b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I32Trap; 40530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_trap: 40540b57cec5SDimitry Andric return NVPTXISD::Suld2DI8Trap; 40550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_trap: 40560b57cec5SDimitry Andric return NVPTXISD::Suld2DI16Trap; 40570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_trap: 40580b57cec5SDimitry Andric return NVPTXISD::Suld2DI32Trap; 40590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_trap: 40600b57cec5SDimitry Andric return NVPTXISD::Suld2DI64Trap; 40610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_trap: 40620b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I8Trap; 40630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_trap: 40640b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I16Trap; 40650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_trap: 40660b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I32Trap; 40670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_trap: 40680b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I64Trap; 40690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_trap: 40700b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I8Trap; 40710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_trap: 40720b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I16Trap; 40730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_trap: 40740b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I32Trap; 40750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_trap: 40760b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI8Trap; 40770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_trap: 40780b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI16Trap; 40790b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_trap: 40800b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI32Trap; 40810b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_trap: 40820b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI64Trap; 40830b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 40840b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I8Trap; 40850b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 40860b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I16Trap; 40870b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 40880b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I32Trap; 40890b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 40900b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I64Trap; 40910b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 40920b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I8Trap; 40930b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 40940b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I16Trap; 40950b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 40960b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I32Trap; 40970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_trap: 40980b57cec5SDimitry Andric return NVPTXISD::Suld3DI8Trap; 40990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_trap: 41000b57cec5SDimitry Andric return NVPTXISD::Suld3DI16Trap; 41010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_trap: 41020b57cec5SDimitry Andric return NVPTXISD::Suld3DI32Trap; 41030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_trap: 41040b57cec5SDimitry Andric return NVPTXISD::Suld3DI64Trap; 41050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_trap: 41060b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I8Trap; 41070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_trap: 41080b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I16Trap; 41090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_trap: 41100b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I32Trap; 41110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_trap: 41120b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I64Trap; 41130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_trap: 41140b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I8Trap; 41150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_trap: 41160b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I16Trap; 41170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_trap: 41180b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I32Trap; 41190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_zero: 41200b57cec5SDimitry Andric return NVPTXISD::Suld1DI8Zero; 41210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_zero: 41220b57cec5SDimitry Andric return NVPTXISD::Suld1DI16Zero; 41230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_zero: 41240b57cec5SDimitry Andric return NVPTXISD::Suld1DI32Zero; 41250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_zero: 41260b57cec5SDimitry Andric return NVPTXISD::Suld1DI64Zero; 41270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_zero: 41280b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I8Zero; 41290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_zero: 41300b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I16Zero; 41310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_zero: 41320b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I32Zero; 41330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_zero: 41340b57cec5SDimitry Andric return NVPTXISD::Suld1DV2I64Zero; 41350b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_zero: 41360b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I8Zero; 41370b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_zero: 41380b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I16Zero; 41390b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_zero: 41400b57cec5SDimitry Andric return NVPTXISD::Suld1DV4I32Zero; 41410b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_zero: 41420b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI8Zero; 41430b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_zero: 41440b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI16Zero; 41450b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_zero: 41460b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI32Zero; 41470b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_zero: 41480b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayI64Zero; 41490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 41500b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I8Zero; 41510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 41520b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I16Zero; 41530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 41540b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I32Zero; 41550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 41560b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV2I64Zero; 41570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 41580b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I8Zero; 41590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 41600b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I16Zero; 41610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 41620b57cec5SDimitry Andric return NVPTXISD::Suld1DArrayV4I32Zero; 41630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_zero: 41640b57cec5SDimitry Andric return NVPTXISD::Suld2DI8Zero; 41650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_zero: 41660b57cec5SDimitry Andric return NVPTXISD::Suld2DI16Zero; 41670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_zero: 41680b57cec5SDimitry Andric return NVPTXISD::Suld2DI32Zero; 41690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_zero: 41700b57cec5SDimitry Andric return NVPTXISD::Suld2DI64Zero; 41710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_zero: 41720b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I8Zero; 41730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_zero: 41740b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I16Zero; 41750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_zero: 41760b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I32Zero; 41770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_zero: 41780b57cec5SDimitry Andric return NVPTXISD::Suld2DV2I64Zero; 41790b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_zero: 41800b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I8Zero; 41810b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_zero: 41820b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I16Zero; 41830b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_zero: 41840b57cec5SDimitry Andric return NVPTXISD::Suld2DV4I32Zero; 41850b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_zero: 41860b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI8Zero; 41870b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_zero: 41880b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI16Zero; 41890b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_zero: 41900b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI32Zero; 41910b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_zero: 41920b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayI64Zero; 41930b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 41940b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I8Zero; 41950b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 41960b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I16Zero; 41970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 41980b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I32Zero; 41990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 42000b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV2I64Zero; 42010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 42020b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I8Zero; 42030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 42040b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I16Zero; 42050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 42060b57cec5SDimitry Andric return NVPTXISD::Suld2DArrayV4I32Zero; 42070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_zero: 42080b57cec5SDimitry Andric return NVPTXISD::Suld3DI8Zero; 42090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_zero: 42100b57cec5SDimitry Andric return NVPTXISD::Suld3DI16Zero; 42110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_zero: 42120b57cec5SDimitry Andric return NVPTXISD::Suld3DI32Zero; 42130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_zero: 42140b57cec5SDimitry Andric return NVPTXISD::Suld3DI64Zero; 42150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_zero: 42160b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I8Zero; 42170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_zero: 42180b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I16Zero; 42190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_zero: 42200b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I32Zero; 42210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_zero: 42220b57cec5SDimitry Andric return NVPTXISD::Suld3DV2I64Zero; 42230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_zero: 42240b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I8Zero; 42250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_zero: 42260b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I16Zero; 42270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_zero: 42280b57cec5SDimitry Andric return NVPTXISD::Suld3DV4I32Zero; 42290b57cec5SDimitry Andric } 42300b57cec5SDimitry Andric } 42310b57cec5SDimitry Andric 42320b57cec5SDimitry Andric // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 42330b57cec5SDimitry Andric // TgtMemIntrinsic 42340b57cec5SDimitry Andric // because we need the information that is only available in the "Value" type 42350b57cec5SDimitry Andric // of destination 42360b57cec5SDimitry Andric // pointer. In particular, the address space information. 42370b57cec5SDimitry Andric bool NVPTXTargetLowering::getTgtMemIntrinsic( 42380b57cec5SDimitry Andric IntrinsicInfo &Info, const CallInst &I, 42390b57cec5SDimitry Andric MachineFunction &MF, unsigned Intrinsic) const { 42400b57cec5SDimitry Andric switch (Intrinsic) { 42410b57cec5SDimitry Andric default: 42420b57cec5SDimitry Andric return false; 42430b57cec5SDimitry Andric case Intrinsic::nvvm_match_all_sync_i32p: 42440b57cec5SDimitry Andric case Intrinsic::nvvm_match_all_sync_i64p: 42450b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 42460b57cec5SDimitry Andric // memVT is bogus. These intrinsics have IntrInaccessibleMemOnly attribute 42470b57cec5SDimitry Andric // in order to model data exchange with other threads, but perform no real 42480b57cec5SDimitry Andric // memory accesses. 42490b57cec5SDimitry Andric Info.memVT = MVT::i1; 42500b57cec5SDimitry Andric 42510b57cec5SDimitry Andric // Our result depends on both our and other thread's arguments. 42520b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; 42530b57cec5SDimitry Andric return true; 42540b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col: 42550b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row: 42560b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride: 42570b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride: 42580b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col: 42590b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row: 42600b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride: 42610b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride: 42620b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col: 42630b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row: 42640b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride: 42650b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride: 42660b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col: 42670b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row: 42680b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride: 42690b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride: 42700b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col: 42710b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row: 42720b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride: 42730b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride: 42740b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col: 42750b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row: 42760b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride: 42770b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: { 42780b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 42790b57cec5SDimitry Andric Info.memVT = MVT::v8f16; 42800b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 42810b57cec5SDimitry Andric Info.offset = 0; 42820b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 42838bcb0991SDimitry Andric Info.align = Align(16); 42840b57cec5SDimitry Andric return true; 42850b57cec5SDimitry Andric } 42860b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col: 42870b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride: 42880b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride: 42890b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col: 42900b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row: 42910b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride: 42920b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride: 42930b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row: 4294fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col: 4295fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride: 4296fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row: 4297fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride: 42980b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col: 42990b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride: 43000b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride: 43010b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col: 43020b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row: 43030b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride: 43040b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride: 4305fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row: 4306fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col: 4307fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride: 4308fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row: 4309fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: { 43100b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 43110b57cec5SDimitry Andric Info.memVT = MVT::v2i32; 43120b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 43130b57cec5SDimitry Andric Info.offset = 0; 43140b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 43158bcb0991SDimitry Andric Info.align = Align(8); 43160b57cec5SDimitry Andric return true; 43170b57cec5SDimitry Andric } 43180b57cec5SDimitry Andric 43190b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col: 43200b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride: 43210b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride: 43220b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col: 43230b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row: 43240b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride: 43250b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride: 43260b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row: 4327fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col: 4328fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride: 4329fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row: 4330fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride: 4331fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col: 4332fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride: 4333fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row: 4334fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride: 43350b57cec5SDimitry Andric 43360b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col: 43370b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride: 43380b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride: 43390b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col: 43400b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row: 43410b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride: 43420b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride: 4343fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row: 4344fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col: 4345fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride: 4346fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row: 4347fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride: 4348fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col: 4349fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride: 4350fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row: 4351349cc55cSDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride: 4352349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16: 4353349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: { 43540b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 43550b57cec5SDimitry Andric Info.memVT = MVT::v4i32; 43560b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 43570b57cec5SDimitry Andric Info.offset = 0; 43580b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 43598bcb0991SDimitry Andric Info.align = Align(16); 43600b57cec5SDimitry Andric return true; 43610b57cec5SDimitry Andric } 43620b57cec5SDimitry Andric 43630b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col: 43640b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride: 43650b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride: 43660b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col: 43670b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row: 43680b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride: 43690b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride: 43700b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row: 43710b57cec5SDimitry Andric 43720b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col: 43730b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride: 43740b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride: 43750b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col: 43760b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row: 43770b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride: 43780b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride: 43790b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row: 43800b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row: 43810b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride: 43820b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col: 43830b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride: 43840b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row: 43850b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride: 43860b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride: 43870b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row: 43880b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col: 43890b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride: 43900b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride: 4391349cc55cSDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col: 4392349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16: 4393349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: { 43940b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 43950b57cec5SDimitry Andric Info.memVT = MVT::i32; 43960b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 43970b57cec5SDimitry Andric Info.offset = 0; 43980b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 43998bcb0991SDimitry Andric Info.align = Align(4); 44000b57cec5SDimitry Andric return true; 44010b57cec5SDimitry Andric } 44020b57cec5SDimitry Andric 44030b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col: 44040b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row: 44050b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride: 44060b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride: 44070b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col: 44080b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row: 44090b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride: 44100b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride: 44110b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col: 44120b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row: 44130b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride: 44140b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: { 44150b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 44160b57cec5SDimitry Andric Info.memVT = MVT::v4f16; 44170b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 44180b57cec5SDimitry Andric Info.offset = 0; 44190b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 44208bcb0991SDimitry Andric Info.align = Align(16); 44210b57cec5SDimitry Andric return true; 44220b57cec5SDimitry Andric } 44230b57cec5SDimitry Andric 44240b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col: 44250b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row: 44260b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride: 44270b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride: 44280b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col: 44290b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row: 44300b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride: 44310b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride: 44320b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col: 44330b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row: 44340b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride: 4435fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride: 4436fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col: 4437fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row: 4438fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride: 4439fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: { 44400b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 44410b57cec5SDimitry Andric Info.memVT = MVT::v8f32; 44420b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 44430b57cec5SDimitry Andric Info.offset = 0; 44440b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 44458bcb0991SDimitry Andric Info.align = Align(16); 44460b57cec5SDimitry Andric return true; 44470b57cec5SDimitry Andric } 44480b57cec5SDimitry Andric 4449fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col: 4450fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride: 4451fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row: 4452fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride: 4453fe6060f1SDimitry Andric 4454fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col: 4455fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride: 4456fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row: 4457fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride: 4458fe6060f1SDimitry Andric 44590b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col: 44600b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride: 44610b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row: 44620b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride: 44630b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col: 44640b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride: 44650b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row: 44660b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride: 44670b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col: 44680b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride: 44690b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row: 44700b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: { 44710b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 44720b57cec5SDimitry Andric Info.memVT = MVT::v8i32; 44730b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 44740b57cec5SDimitry Andric Info.offset = 0; 44750b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 44768bcb0991SDimitry Andric Info.align = Align(16); 44770b57cec5SDimitry Andric return true; 44780b57cec5SDimitry Andric } 44790b57cec5SDimitry Andric 44800b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col: 44810b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride: 44820b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row: 44830b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride: 44840b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col: 44850b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride: 44860b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row: 4487349cc55cSDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride: 4488349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16: 4489349cc55cSDimitry Andric case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: { 44900b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 44910b57cec5SDimitry Andric Info.memVT = MVT::v2i32; 44920b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 44930b57cec5SDimitry Andric Info.offset = 0; 44940b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 44958bcb0991SDimitry Andric Info.align = Align(8); 44960b57cec5SDimitry Andric return true; 44970b57cec5SDimitry Andric } 44980b57cec5SDimitry Andric 4499fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col: 4500fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride: 4501fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row: 4502fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride: 4503fe6060f1SDimitry Andric 4504fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col: 4505fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride: 4506fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row: 4507fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: { 4508fe6060f1SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 4509fe6060f1SDimitry Andric Info.memVT = MVT::f64; 4510fe6060f1SDimitry Andric Info.ptrVal = I.getArgOperand(0); 4511fe6060f1SDimitry Andric Info.offset = 0; 4512fe6060f1SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 4513fe6060f1SDimitry Andric Info.align = Align(8); 4514fe6060f1SDimitry Andric return true; 4515fe6060f1SDimitry Andric } 4516fe6060f1SDimitry Andric 4517fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col: 4518fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride: 4519fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row: 4520fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: { 4521fe6060f1SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 4522fe6060f1SDimitry Andric Info.memVT = MVT::v2f64; 4523fe6060f1SDimitry Andric Info.ptrVal = I.getArgOperand(0); 4524fe6060f1SDimitry Andric Info.offset = 0; 4525fe6060f1SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 4526fe6060f1SDimitry Andric Info.align = Align(16); 4527fe6060f1SDimitry Andric return true; 4528fe6060f1SDimitry Andric } 4529fe6060f1SDimitry Andric 45300b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col: 45310b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row: 45320b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride: 45330b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride: 45340b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col: 45350b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row: 45360b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride: 45370b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride: 45380b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col: 45390b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row: 45400b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride: 45410b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: { 45420b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_VOID; 45430b57cec5SDimitry Andric Info.memVT = MVT::v4f16; 45440b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 45450b57cec5SDimitry Andric Info.offset = 0; 45460b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOStore; 45478bcb0991SDimitry Andric Info.align = Align(16); 45480b57cec5SDimitry Andric return true; 45490b57cec5SDimitry Andric } 45500b57cec5SDimitry Andric 45510b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col: 45520b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row: 45530b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride: 45540b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride: 45550b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col: 45560b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row: 45570b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride: 45580b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride: 45590b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col: 45600b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row: 45610b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride: 4562fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride: 4563fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col: 4564fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row: 4565fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride: 4566fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: { 45670b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_VOID; 45680b57cec5SDimitry Andric Info.memVT = MVT::v8f32; 45690b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 45700b57cec5SDimitry Andric Info.offset = 0; 45710b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOStore; 45728bcb0991SDimitry Andric Info.align = Align(16); 45730b57cec5SDimitry Andric return true; 45740b57cec5SDimitry Andric } 45750b57cec5SDimitry Andric 45760b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col: 45770b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride: 45780b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row: 45790b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride: 45800b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col: 45810b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride: 45820b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row: 45830b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride: 45840b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col: 45850b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride: 45860b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row: 45870b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: { 45880b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_VOID; 45890b57cec5SDimitry Andric Info.memVT = MVT::v8i32; 45900b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 45910b57cec5SDimitry Andric Info.offset = 0; 45920b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOStore; 45938bcb0991SDimitry Andric Info.align = Align(16); 45940b57cec5SDimitry Andric return true; 45950b57cec5SDimitry Andric } 45960b57cec5SDimitry Andric 45970b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col: 45980b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride: 45990b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row: 46000b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride: 46010b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col: 46020b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride: 46030b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row: 46040b57cec5SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: { 46050b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_VOID; 46060b57cec5SDimitry Andric Info.memVT = MVT::v2i32; 46070b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 46080b57cec5SDimitry Andric Info.offset = 0; 46090b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOStore; 46108bcb0991SDimitry Andric Info.align = Align(8); 46110b57cec5SDimitry Andric return true; 46120b57cec5SDimitry Andric } 46130b57cec5SDimitry Andric 4614fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col: 4615fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride: 4616fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row: 4617fe6060f1SDimitry Andric case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: { 4618fe6060f1SDimitry Andric Info.opc = ISD::INTRINSIC_VOID; 4619fe6060f1SDimitry Andric Info.memVT = MVT::v2f64; 4620fe6060f1SDimitry Andric Info.ptrVal = I.getArgOperand(0); 4621fe6060f1SDimitry Andric Info.offset = 0; 4622fe6060f1SDimitry Andric Info.flags = MachineMemOperand::MOStore; 4623fe6060f1SDimitry Andric Info.align = Align(16); 4624fe6060f1SDimitry Andric return true; 4625fe6060f1SDimitry Andric } 4626fe6060f1SDimitry Andric 46270b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_load_inc_32: 46280b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_load_dec_32: 46290b57cec5SDimitry Andric 46300b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_add_gen_f_cta: 46310b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_add_gen_f_sys: 46320b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_add_gen_i_cta: 46330b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_add_gen_i_sys: 46340b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_and_gen_i_cta: 46350b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_and_gen_i_sys: 46360b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_cas_gen_i_cta: 46370b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_cas_gen_i_sys: 46380b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_dec_gen_i_cta: 46390b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_dec_gen_i_sys: 46400b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_inc_gen_i_cta: 46410b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_inc_gen_i_sys: 46420b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_max_gen_i_cta: 46430b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_max_gen_i_sys: 46440b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_min_gen_i_cta: 46450b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_min_gen_i_sys: 46460b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_or_gen_i_cta: 46470b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_or_gen_i_sys: 46480b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_exch_gen_i_cta: 46490b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_exch_gen_i_sys: 46500b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_xor_gen_i_cta: 46510b57cec5SDimitry Andric case Intrinsic::nvvm_atomic_xor_gen_i_sys: { 46520fca6ea1SDimitry Andric auto &DL = I.getDataLayout(); 46530b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 46540b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType()); 46550b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 46560b57cec5SDimitry Andric Info.offset = 0; 46570b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; 46588bcb0991SDimitry Andric Info.align.reset(); 46590b57cec5SDimitry Andric return true; 46600b57cec5SDimitry Andric } 46610b57cec5SDimitry Andric 46620b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_i: 46630b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_f: 46640b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_p: { 46650fca6ea1SDimitry Andric auto &DL = I.getDataLayout(); 46660b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 46670b57cec5SDimitry Andric if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 46680b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType()); 46690b57cec5SDimitry Andric else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) 46700b57cec5SDimitry Andric Info.memVT = getPointerTy(DL); 46710b57cec5SDimitry Andric else 46720b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType()); 46730b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 46740b57cec5SDimitry Andric Info.offset = 0; 46750b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 46765ffd83dbSDimitry Andric Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); 46770b57cec5SDimitry Andric 46780b57cec5SDimitry Andric return true; 46790b57cec5SDimitry Andric } 46800b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_i: 46810b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_f: 46820b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_p: { 46830fca6ea1SDimitry Andric auto &DL = I.getDataLayout(); 46840b57cec5SDimitry Andric 46850b57cec5SDimitry Andric Info.opc = ISD::INTRINSIC_W_CHAIN; 46860b57cec5SDimitry Andric if (Intrinsic == Intrinsic::nvvm_ldg_global_i) 46870b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType()); 46880b57cec5SDimitry Andric else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) 46890b57cec5SDimitry Andric Info.memVT = getPointerTy(DL); 46900b57cec5SDimitry Andric else 46910b57cec5SDimitry Andric Info.memVT = getValueType(DL, I.getType()); 46920b57cec5SDimitry Andric Info.ptrVal = I.getArgOperand(0); 46930b57cec5SDimitry Andric Info.offset = 0; 46940b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 46955ffd83dbSDimitry Andric Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); 46960b57cec5SDimitry Andric 46970b57cec5SDimitry Andric return true; 46980b57cec5SDimitry Andric } 46990b57cec5SDimitry Andric 47000b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4f32_s32: 47010b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4f32_f32: 47020b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 47030b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 47040b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 47050b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 47060b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 47070b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 47080b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4f32_s32: 47090b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4f32_f32: 47100b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 47110b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 47120b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 47130b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 47140b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 47150b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 47160b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4f32_s32: 47170b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4f32_f32: 47180b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 47190b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 47200b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4f32_f32: 47210b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 47220b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 47230b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 47240b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 47250b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 47260b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 47270b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 47280b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 47290b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 47300b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 47310b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 47320b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 47330b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 47340b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 47350b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 47360b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 47370b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 47380b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 47390b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 47400b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 47410b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 47420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 47430b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 47440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 47450b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 47460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 47470b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 47480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 47490b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 47500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 47510b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 47527a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32: 47537a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32: 47540b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 47550b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 47560b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 47570b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: 47580b57cec5SDimitry Andric Info.opc = getOpcForTextureInstr(Intrinsic); 47590b57cec5SDimitry Andric Info.memVT = MVT::v4f32; 47600b57cec5SDimitry Andric Info.ptrVal = nullptr; 47610b57cec5SDimitry Andric Info.offset = 0; 47620b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 47638bcb0991SDimitry Andric Info.align = Align(16); 47640b57cec5SDimitry Andric return true; 47650b57cec5SDimitry Andric 47660b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4s32_s32: 47670b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4s32_f32: 47680b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 47690b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 47700b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 47710b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 47720b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 47730b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 47740b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4s32_s32: 47750b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4s32_f32: 47760b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 47770b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 47780b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 47790b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 47800b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 47810b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 47820b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4s32_s32: 47830b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4s32_f32: 47840b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 47850b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 47860b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4s32_f32: 47870b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 47880b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 47890b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 47900b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_v4u32_f32: 47910b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 47920b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 47930b57cec5SDimitry Andric case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 47940b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4u32_s32: 47950b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_v4u32_f32: 47960b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 47970b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 47980b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 47990b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 48000b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 48010b57cec5SDimitry Andric case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 48020b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4u32_s32: 48030b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_v4u32_f32: 48040b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 48050b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 48060b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 48070b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 48080b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 48090b57cec5SDimitry Andric case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 48100b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4u32_s32: 48110b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_v4u32_f32: 48120b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 48130b57cec5SDimitry Andric case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 48140b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 48150b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 48160b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 48170b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 48180b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 48190b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 48200b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 48210b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 48220b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 48230b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 48240b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 48250b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 48260b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 48270b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 48280b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 48290b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 48300b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 48310b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 48320b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 48330b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 48340b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 48350b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 48360b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 48370b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 48380b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 48390b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 48400b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 48410b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 48420b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 48430b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 48440b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 48450b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 48460b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 48470b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 48480b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 48490b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 48500b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 48510b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 48520b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 48530b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 48540b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 48550b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 48560b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 48570b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 48580b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 48590b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 48600b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 48610b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 48620b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 48630b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 48640b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 48650b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 48660b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 48670b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 48680b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 48690b57cec5SDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 48707a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32: 48717a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32: 48727a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32: 48737a6dacacSDimitry Andric case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32: 48740b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 48750b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 48760b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 48770b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 48780b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 48790b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 48800b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 48810b57cec5SDimitry Andric case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: 48820b57cec5SDimitry Andric Info.opc = getOpcForTextureInstr(Intrinsic); 48830b57cec5SDimitry Andric Info.memVT = MVT::v4i32; 48840b57cec5SDimitry Andric Info.ptrVal = nullptr; 48850b57cec5SDimitry Andric Info.offset = 0; 48860b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 48878bcb0991SDimitry Andric Info.align = Align(16); 48880b57cec5SDimitry Andric return true; 48890b57cec5SDimitry Andric 48900b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_clamp: 48910b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_clamp: 48920b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_clamp: 48930b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_clamp: 48940b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 48950b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 48960b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_clamp: 48970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_clamp: 48980b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_clamp: 48990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_clamp: 49000b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 49010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 49020b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_clamp: 49030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_clamp: 49040b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_clamp: 49050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_trap: 49060b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_trap: 49070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_trap: 49080b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_trap: 49090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 49100b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 49110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_trap: 49120b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_trap: 49130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_trap: 49140b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_trap: 49150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 49160b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 49170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_trap: 49180b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_trap: 49190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_trap: 49200b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i8_zero: 49210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i8_zero: 49220b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i8_zero: 49230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i8_zero: 49240b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 49250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 49260b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i8_zero: 49270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i8_zero: 49280b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i8_zero: 49290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i8_zero: 49300b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 49310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 49320b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i8_zero: 49330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i8_zero: 49340b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i8_zero: 49350b57cec5SDimitry Andric Info.opc = getOpcForSurfaceInstr(Intrinsic); 49360b57cec5SDimitry Andric Info.memVT = MVT::i8; 49370b57cec5SDimitry Andric Info.ptrVal = nullptr; 49380b57cec5SDimitry Andric Info.offset = 0; 49390b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 49408bcb0991SDimitry Andric Info.align = Align(16); 49410b57cec5SDimitry Andric return true; 49420b57cec5SDimitry Andric 49430b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_clamp: 49440b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_clamp: 49450b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_clamp: 49460b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_clamp: 49470b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 49480b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 49490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_clamp: 49500b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_clamp: 49510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_clamp: 49520b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_clamp: 49530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 49540b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 49550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_clamp: 49560b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_clamp: 49570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_clamp: 49580b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_trap: 49590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_trap: 49600b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_trap: 49610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_trap: 49620b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 49630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 49640b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_trap: 49650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_trap: 49660b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_trap: 49670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_trap: 49680b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 49690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 49700b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_trap: 49710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_trap: 49720b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_trap: 49730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i16_zero: 49740b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i16_zero: 49750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i16_zero: 49760b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i16_zero: 49770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 49780b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 49790b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i16_zero: 49800b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i16_zero: 49810b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i16_zero: 49820b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i16_zero: 49830b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 49840b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 49850b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i16_zero: 49860b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i16_zero: 49870b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i16_zero: 49880b57cec5SDimitry Andric Info.opc = getOpcForSurfaceInstr(Intrinsic); 49890b57cec5SDimitry Andric Info.memVT = MVT::i16; 49900b57cec5SDimitry Andric Info.ptrVal = nullptr; 49910b57cec5SDimitry Andric Info.offset = 0; 49920b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 49938bcb0991SDimitry Andric Info.align = Align(16); 49940b57cec5SDimitry Andric return true; 49950b57cec5SDimitry Andric 49960b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_clamp: 49970b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_clamp: 49980b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_clamp: 49990b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_clamp: 50000b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 50010b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 50020b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_clamp: 50030b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_clamp: 50040b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_clamp: 50050b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_clamp: 50060b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 50070b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 50080b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_clamp: 50090b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_clamp: 50100b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_clamp: 50110b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_trap: 50120b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_trap: 50130b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_trap: 50140b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_trap: 50150b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 50160b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 50170b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_trap: 50180b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_trap: 50190b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_trap: 50200b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_trap: 50210b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 50220b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 50230b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_trap: 50240b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_trap: 50250b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_trap: 50260b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i32_zero: 50270b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i32_zero: 50280b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v4i32_zero: 50290b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i32_zero: 50300b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 50310b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 50320b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i32_zero: 50330b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i32_zero: 50340b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v4i32_zero: 50350b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i32_zero: 50360b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 50370b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 50380b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i32_zero: 50390b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i32_zero: 50400b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v4i32_zero: 50410b57cec5SDimitry Andric Info.opc = getOpcForSurfaceInstr(Intrinsic); 50420b57cec5SDimitry Andric Info.memVT = MVT::i32; 50430b57cec5SDimitry Andric Info.ptrVal = nullptr; 50440b57cec5SDimitry Andric Info.offset = 0; 50450b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 50468bcb0991SDimitry Andric Info.align = Align(16); 50470b57cec5SDimitry Andric return true; 50480b57cec5SDimitry Andric 50490b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_clamp: 50500b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_clamp: 50510b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_clamp: 50520b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 50530b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_clamp: 50540b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_clamp: 50550b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_clamp: 50560b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 50570b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_clamp: 50580b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_clamp: 50590b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_trap: 50600b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_trap: 50610b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_trap: 50620b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 50630b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_trap: 50640b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_trap: 50650b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_trap: 50660b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 50670b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_trap: 50680b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_trap: 50690b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_i64_zero: 50700b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_v2i64_zero: 50710b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_i64_zero: 50720b57cec5SDimitry Andric case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 50730b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_i64_zero: 50740b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_v2i64_zero: 50750b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_i64_zero: 50760b57cec5SDimitry Andric case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 50770b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_i64_zero: 50780b57cec5SDimitry Andric case Intrinsic::nvvm_suld_3d_v2i64_zero: 50790b57cec5SDimitry Andric Info.opc = getOpcForSurfaceInstr(Intrinsic); 50800b57cec5SDimitry Andric Info.memVT = MVT::i64; 50810b57cec5SDimitry Andric Info.ptrVal = nullptr; 50820b57cec5SDimitry Andric Info.offset = 0; 50830b57cec5SDimitry Andric Info.flags = MachineMemOperand::MOLoad; 50848bcb0991SDimitry Andric Info.align = Align(16); 50850b57cec5SDimitry Andric return true; 50860b57cec5SDimitry Andric } 50870b57cec5SDimitry Andric return false; 50880b57cec5SDimitry Andric } 50890b57cec5SDimitry Andric 509081ad6265SDimitry Andric /// getFunctionParamOptimizedAlign - since function arguments are passed via 509181ad6265SDimitry Andric /// .param space, we may want to increase their alignment in a way that 509281ad6265SDimitry Andric /// ensures that we can effectively vectorize their loads & stores. We can 509381ad6265SDimitry Andric /// increase alignment only if the function has internal or has private 509481ad6265SDimitry Andric /// linkage as for other linkage types callers may already rely on default 509581ad6265SDimitry Andric /// alignment. To allow using 128-bit vectorized loads/stores, this function 509681ad6265SDimitry Andric /// ensures that alignment is 16 or greater. 509781ad6265SDimitry Andric Align NVPTXTargetLowering::getFunctionParamOptimizedAlign( 509881ad6265SDimitry Andric const Function *F, Type *ArgTy, const DataLayout &DL) const { 50990fca6ea1SDimitry Andric // Capping the alignment to 128 bytes as that is the maximum alignment 51000fca6ea1SDimitry Andric // supported by PTX. 51010fca6ea1SDimitry Andric const Align ABITypeAlign = std::min(Align(128), DL.getABITypeAlign(ArgTy)); 510281ad6265SDimitry Andric 510381ad6265SDimitry Andric // If a function has linkage different from internal or private, we 5104bdd1243dSDimitry Andric // must use default ABI alignment as external users rely on it. Same 5105bdd1243dSDimitry Andric // for a function that may be called from a function pointer. 5106bdd1243dSDimitry Andric if (!F || !F->hasLocalLinkage() || 5107bdd1243dSDimitry Andric F->hasAddressTaken(/*Users=*/nullptr, 5108bdd1243dSDimitry Andric /*IgnoreCallbackUses=*/false, 5109bdd1243dSDimitry Andric /*IgnoreAssumeLikeCalls=*/true, 5110bdd1243dSDimitry Andric /*IgnoreLLVMUsed=*/true)) 51110fca6ea1SDimitry Andric return ABITypeAlign; 511281ad6265SDimitry Andric 511381ad6265SDimitry Andric assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage"); 51140fca6ea1SDimitry Andric return std::max(Align(16), ABITypeAlign); 511581ad6265SDimitry Andric } 511681ad6265SDimitry Andric 5117bdd1243dSDimitry Andric /// Helper for computing alignment of a device function byval parameter. 5118bdd1243dSDimitry Andric Align NVPTXTargetLowering::getFunctionByValParamAlign( 5119bdd1243dSDimitry Andric const Function *F, Type *ArgTy, Align InitialAlign, 5120bdd1243dSDimitry Andric const DataLayout &DL) const { 5121bdd1243dSDimitry Andric Align ArgAlign = InitialAlign; 5122bdd1243dSDimitry Andric // Try to increase alignment to enhance vectorization options. 5123bdd1243dSDimitry Andric if (F) 5124bdd1243dSDimitry Andric ArgAlign = std::max(ArgAlign, getFunctionParamOptimizedAlign(F, ArgTy, DL)); 5125bdd1243dSDimitry Andric 512606c3fb27SDimitry Andric // Old ptx versions have a bug. When PTX code takes address of 5127bdd1243dSDimitry Andric // byval parameter with alignment < 4, ptxas generates code to 5128bdd1243dSDimitry Andric // spill argument into memory. Alas on sm_50+ ptxas generates 5129bdd1243dSDimitry Andric // SASS code that fails with misaligned access. To work around 5130bdd1243dSDimitry Andric // the problem, make sure that we align byval parameters by at 513106c3fb27SDimitry Andric // least 4. This bug seems to be fixed at least starting from 513206c3fb27SDimitry Andric // ptxas > 9.0. 513306c3fb27SDimitry Andric // TODO: remove this after verifying the bug is not reproduced 513406c3fb27SDimitry Andric // on non-deprecated ptxas versions. 513506c3fb27SDimitry Andric if (ForceMinByValParamAlign) 5136bdd1243dSDimitry Andric ArgAlign = std::max(ArgAlign, Align(4)); 5137bdd1243dSDimitry Andric 5138bdd1243dSDimitry Andric return ArgAlign; 5139bdd1243dSDimitry Andric } 5140bdd1243dSDimitry Andric 514106c3fb27SDimitry Andric // Helper for getting a function parameter name. Name is composed from 514206c3fb27SDimitry Andric // its index and the function name. Negative index corresponds to special 514306c3fb27SDimitry Andric // parameter (unsized array) used for passing variable arguments. 514406c3fb27SDimitry Andric std::string NVPTXTargetLowering::getParamName(const Function *F, 514506c3fb27SDimitry Andric int Idx) const { 514606c3fb27SDimitry Andric std::string ParamName; 514706c3fb27SDimitry Andric raw_string_ostream ParamStr(ParamName); 514806c3fb27SDimitry Andric 514906c3fb27SDimitry Andric ParamStr << getTargetMachine().getSymbol(F)->getName(); 515006c3fb27SDimitry Andric if (Idx < 0) 515106c3fb27SDimitry Andric ParamStr << "_vararg"; 515206c3fb27SDimitry Andric else 515306c3fb27SDimitry Andric ParamStr << "_param_" << Idx; 515406c3fb27SDimitry Andric 515506c3fb27SDimitry Andric return ParamName; 515606c3fb27SDimitry Andric } 515706c3fb27SDimitry Andric 51580b57cec5SDimitry Andric /// isLegalAddressingMode - Return true if the addressing mode represented 51590b57cec5SDimitry Andric /// by AM is legal for this target, for a load/store of the specified type. 51600b57cec5SDimitry Andric /// Used to guide target specific optimizations, like loop strength reduction 51610b57cec5SDimitry Andric /// (LoopStrengthReduce.cpp) and memory optimization for address mode 51620b57cec5SDimitry Andric /// (CodeGenPrepare.cpp) 51630b57cec5SDimitry Andric bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, 51640b57cec5SDimitry Andric const AddrMode &AM, Type *Ty, 51650b57cec5SDimitry Andric unsigned AS, Instruction *I) const { 51660b57cec5SDimitry Andric // AddrMode - This represents an addressing mode of: 51670b57cec5SDimitry Andric // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 51680b57cec5SDimitry Andric // 51690b57cec5SDimitry Andric // The legal address modes are 51700b57cec5SDimitry Andric // - [avar] 51710b57cec5SDimitry Andric // - [areg] 51720b57cec5SDimitry Andric // - [areg+immoff] 51730b57cec5SDimitry Andric // - [immAddr] 51740b57cec5SDimitry Andric 51750fca6ea1SDimitry Andric // immoff must fit in a signed 32-bit int 51760fca6ea1SDimitry Andric if (!APInt(64, AM.BaseOffs).isSignedIntN(32)) 51770fca6ea1SDimitry Andric return false; 51780fca6ea1SDimitry Andric 51790fca6ea1SDimitry Andric if (AM.BaseGV) 51800b57cec5SDimitry Andric return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; 51810b57cec5SDimitry Andric 51820b57cec5SDimitry Andric switch (AM.Scale) { 51830b57cec5SDimitry Andric case 0: // "r", "r+i" or "i" is allowed 51840b57cec5SDimitry Andric break; 51850b57cec5SDimitry Andric case 1: 51860b57cec5SDimitry Andric if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 51870b57cec5SDimitry Andric return false; 51880b57cec5SDimitry Andric // Otherwise we have r+i. 51890b57cec5SDimitry Andric break; 51900b57cec5SDimitry Andric default: 51910b57cec5SDimitry Andric // No scale > 1 is allowed 51920b57cec5SDimitry Andric return false; 51930b57cec5SDimitry Andric } 51940b57cec5SDimitry Andric return true; 51950b57cec5SDimitry Andric } 51960b57cec5SDimitry Andric 51970b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 51980b57cec5SDimitry Andric // NVPTX Inline Assembly Support 51990b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 52000b57cec5SDimitry Andric 52010b57cec5SDimitry Andric /// getConstraintType - Given a constraint letter, return the type of 52020b57cec5SDimitry Andric /// constraint it is for this target. 52030b57cec5SDimitry Andric NVPTXTargetLowering::ConstraintType 52040b57cec5SDimitry Andric NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { 52050b57cec5SDimitry Andric if (Constraint.size() == 1) { 52060b57cec5SDimitry Andric switch (Constraint[0]) { 52070b57cec5SDimitry Andric default: 52080b57cec5SDimitry Andric break; 52090b57cec5SDimitry Andric case 'b': 52100b57cec5SDimitry Andric case 'r': 52110b57cec5SDimitry Andric case 'h': 52120b57cec5SDimitry Andric case 'c': 52130b57cec5SDimitry Andric case 'l': 52140b57cec5SDimitry Andric case 'f': 52150b57cec5SDimitry Andric case 'd': 52160fca6ea1SDimitry Andric case 'q': 52170b57cec5SDimitry Andric case '0': 52180b57cec5SDimitry Andric case 'N': 52190b57cec5SDimitry Andric return C_RegisterClass; 52200b57cec5SDimitry Andric } 52210b57cec5SDimitry Andric } 52220b57cec5SDimitry Andric return TargetLowering::getConstraintType(Constraint); 52230b57cec5SDimitry Andric } 52240b57cec5SDimitry Andric 52250b57cec5SDimitry Andric std::pair<unsigned, const TargetRegisterClass *> 52260b57cec5SDimitry Andric NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 52270b57cec5SDimitry Andric StringRef Constraint, 52280b57cec5SDimitry Andric MVT VT) const { 52290b57cec5SDimitry Andric if (Constraint.size() == 1) { 52300b57cec5SDimitry Andric switch (Constraint[0]) { 52310b57cec5SDimitry Andric case 'b': 52320b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int1RegsRegClass); 52330b57cec5SDimitry Andric case 'c': 52340b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 52350b57cec5SDimitry Andric case 'h': 52360b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 52370b57cec5SDimitry Andric case 'r': 52380b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 52390b57cec5SDimitry Andric case 'l': 52400b57cec5SDimitry Andric case 'N': 52410b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 52420fca6ea1SDimitry Andric case 'q': { 52430fca6ea1SDimitry Andric if (STI.getSmVersion() < 70) 52440fca6ea1SDimitry Andric report_fatal_error("Inline asm with 128 bit operands is only " 52450fca6ea1SDimitry Andric "supported for sm_70 and higher!"); 52460fca6ea1SDimitry Andric return std::make_pair(0U, &NVPTX::Int128RegsRegClass); 52470fca6ea1SDimitry Andric } 52480b57cec5SDimitry Andric case 'f': 52490b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 52500b57cec5SDimitry Andric case 'd': 52510b57cec5SDimitry Andric return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 52520b57cec5SDimitry Andric } 52530b57cec5SDimitry Andric } 52540b57cec5SDimitry Andric return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 52550b57cec5SDimitry Andric } 52560b57cec5SDimitry Andric 52570b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 52580b57cec5SDimitry Andric // NVPTX DAG Combining 52590b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 52600b57cec5SDimitry Andric 52610b57cec5SDimitry Andric bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, 52625f757f3fSDimitry Andric CodeGenOptLevel OptLevel) const { 52630b57cec5SDimitry Andric // Always honor command-line argument 52640b57cec5SDimitry Andric if (FMAContractLevelOpt.getNumOccurrences() > 0) 52650b57cec5SDimitry Andric return FMAContractLevelOpt > 0; 52660b57cec5SDimitry Andric 52670b57cec5SDimitry Andric // Do not contract if we're not optimizing the code. 52685f757f3fSDimitry Andric if (OptLevel == CodeGenOptLevel::None) 52690b57cec5SDimitry Andric return false; 52700b57cec5SDimitry Andric 52710b57cec5SDimitry Andric // Honor TargetOptions flags that explicitly say fusion is okay. 52720b57cec5SDimitry Andric if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast) 52730b57cec5SDimitry Andric return true; 52740b57cec5SDimitry Andric 52750b57cec5SDimitry Andric return allowUnsafeFPMath(MF); 52760b57cec5SDimitry Andric } 52770b57cec5SDimitry Andric 52780b57cec5SDimitry Andric bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const { 52790b57cec5SDimitry Andric // Honor TargetOptions flags that explicitly say unsafe math is okay. 52800b57cec5SDimitry Andric if (MF.getTarget().Options.UnsafeFPMath) 52810b57cec5SDimitry Andric return true; 52820b57cec5SDimitry Andric 52830b57cec5SDimitry Andric // Allow unsafe math if unsafe-fp-math attribute explicitly says so. 52840b57cec5SDimitry Andric const Function &F = MF.getFunction(); 5285fe6060f1SDimitry Andric return F.getFnAttribute("unsafe-fp-math").getValueAsBool(); 52860b57cec5SDimitry Andric } 52870b57cec5SDimitry Andric 52880fca6ea1SDimitry Andric static bool isConstZero(const SDValue &Operand) { 52890fca6ea1SDimitry Andric const auto *Const = dyn_cast<ConstantSDNode>(Operand); 52900fca6ea1SDimitry Andric return Const && Const->getZExtValue() == 0; 52910fca6ea1SDimitry Andric } 52920fca6ea1SDimitry Andric 52930b57cec5SDimitry Andric /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 52940b57cec5SDimitry Andric /// operands N0 and N1. This is a helper for PerformADDCombine that is 52950b57cec5SDimitry Andric /// called with the default operands, and if that fails, with commuted 52960b57cec5SDimitry Andric /// operands. 52970fca6ea1SDimitry Andric static SDValue 52980fca6ea1SDimitry Andric PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 52990fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 53000b57cec5SDimitry Andric EVT VT = N0.getValueType(); 53010fca6ea1SDimitry Andric 53020fca6ea1SDimitry Andric // Since integer multiply-add costs the same as integer multiply 53030fca6ea1SDimitry Andric // but is more costly than integer add, do the fusion only when 53040fca6ea1SDimitry Andric // the mul is only used in the add. 53050fca6ea1SDimitry Andric // TODO: this may not be true for later architectures, consider relaxing this 53060fca6ea1SDimitry Andric if (!N0.getNode()->hasOneUse()) 53070b57cec5SDimitry Andric return SDValue(); 53080b57cec5SDimitry Andric 53090b57cec5SDimitry Andric // fold (add (mul a, b), c) -> (mad a, b, c) 53100b57cec5SDimitry Andric // 53110fca6ea1SDimitry Andric if (N0.getOpcode() == ISD::MUL) 53120fca6ea1SDimitry Andric return DCI.DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, N0.getOperand(0), 53130fca6ea1SDimitry Andric N0.getOperand(1), N1); 53140fca6ea1SDimitry Andric 53150fca6ea1SDimitry Andric // fold (add (select cond, 0, (mul a, b)), c) 53160fca6ea1SDimitry Andric // -> (select cond, c, (mad a, b, c)) 53170fca6ea1SDimitry Andric // 53180fca6ea1SDimitry Andric if (N0.getOpcode() == ISD::SELECT) { 53190fca6ea1SDimitry Andric unsigned ZeroOpNum; 53200fca6ea1SDimitry Andric if (isConstZero(N0->getOperand(1))) 53210fca6ea1SDimitry Andric ZeroOpNum = 1; 53220fca6ea1SDimitry Andric else if (isConstZero(N0->getOperand(2))) 53230fca6ea1SDimitry Andric ZeroOpNum = 2; 53240fca6ea1SDimitry Andric else 53250b57cec5SDimitry Andric return SDValue(); 53260b57cec5SDimitry Andric 53270fca6ea1SDimitry Andric SDValue M = N0->getOperand((ZeroOpNum == 1) ? 2 : 1); 53280fca6ea1SDimitry Andric if (M->getOpcode() != ISD::MUL || !M.getNode()->hasOneUse()) 53290fca6ea1SDimitry Andric return SDValue(); 53300fca6ea1SDimitry Andric 53310fca6ea1SDimitry Andric SDValue MAD = DCI.DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, 53320fca6ea1SDimitry Andric M->getOperand(0), M->getOperand(1), N1); 53330fca6ea1SDimitry Andric return DCI.DAG.getSelect(SDLoc(N), VT, N0->getOperand(0), 53340fca6ea1SDimitry Andric ((ZeroOpNum == 1) ? N1 : MAD), 53350fca6ea1SDimitry Andric ((ZeroOpNum == 1) ? MAD : N1)); 53360b57cec5SDimitry Andric } 53370fca6ea1SDimitry Andric 53380fca6ea1SDimitry Andric return SDValue(); 53390fca6ea1SDimitry Andric } 53400fca6ea1SDimitry Andric 53410fca6ea1SDimitry Andric static SDValue 53420fca6ea1SDimitry Andric PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 53430fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI, 53440fca6ea1SDimitry Andric CodeGenOptLevel OptLevel) { 53450fca6ea1SDimitry Andric EVT VT = N0.getValueType(); 53460fca6ea1SDimitry Andric if (N0.getOpcode() == ISD::FMUL) { 53470b57cec5SDimitry Andric const auto *TLI = static_cast<const NVPTXTargetLowering *>( 53480fca6ea1SDimitry Andric &DCI.DAG.getTargetLoweringInfo()); 53490fca6ea1SDimitry Andric if (!TLI->allowFMA(DCI.DAG.getMachineFunction(), OptLevel)) 53500b57cec5SDimitry Andric return SDValue(); 53510b57cec5SDimitry Andric 53520b57cec5SDimitry Andric // For floating point: 53530b57cec5SDimitry Andric // Do the fusion only when the mul has less than 5 uses and all 53540b57cec5SDimitry Andric // are add. 53550b57cec5SDimitry Andric // The heuristic is that if a use is not an add, then that use 53560b57cec5SDimitry Andric // cannot be fused into fma, therefore mul is still needed anyway. 53570b57cec5SDimitry Andric // If there are more than 4 uses, even if they are all add, fusing 53580b57cec5SDimitry Andric // them will increase register pressue. 53590b57cec5SDimitry Andric // 53600b57cec5SDimitry Andric int numUses = 0; 53610b57cec5SDimitry Andric int nonAddCount = 0; 5362349cc55cSDimitry Andric for (const SDNode *User : N0.getNode()->uses()) { 53630b57cec5SDimitry Andric numUses++; 53640b57cec5SDimitry Andric if (User->getOpcode() != ISD::FADD) 53650b57cec5SDimitry Andric ++nonAddCount; 53660b57cec5SDimitry Andric if (numUses >= 5) 53670b57cec5SDimitry Andric return SDValue(); 53680fca6ea1SDimitry Andric } 53690b57cec5SDimitry Andric if (nonAddCount) { 53700b57cec5SDimitry Andric int orderNo = N->getIROrder(); 53710b57cec5SDimitry Andric int orderNo2 = N0.getNode()->getIROrder(); 53720b57cec5SDimitry Andric // simple heuristics here for considering potential register 53730b57cec5SDimitry Andric // pressure, the logics here is that the differnce are used 53740b57cec5SDimitry Andric // to measure the distance between def and use, the longer distance 53750b57cec5SDimitry Andric // more likely cause register pressure. 53760b57cec5SDimitry Andric if (orderNo - orderNo2 < 500) 53770b57cec5SDimitry Andric return SDValue(); 53780b57cec5SDimitry Andric 53790fca6ea1SDimitry Andric // Now, check if at least one of the FMUL's operands is live beyond the 53800fca6ea1SDimitry Andric // node N, which guarantees that the FMA will not increase register 53810fca6ea1SDimitry Andric // pressure at node N. 53820b57cec5SDimitry Andric bool opIsLive = false; 53830b57cec5SDimitry Andric const SDNode *left = N0.getOperand(0).getNode(); 53840b57cec5SDimitry Andric const SDNode *right = N0.getOperand(1).getNode(); 53850b57cec5SDimitry Andric 53860b57cec5SDimitry Andric if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) 53870b57cec5SDimitry Andric opIsLive = true; 53880b57cec5SDimitry Andric 53890b57cec5SDimitry Andric if (!opIsLive) 5390349cc55cSDimitry Andric for (const SDNode *User : left->uses()) { 53910b57cec5SDimitry Andric int orderNo3 = User->getIROrder(); 53920b57cec5SDimitry Andric if (orderNo3 > orderNo) { 53930b57cec5SDimitry Andric opIsLive = true; 53940b57cec5SDimitry Andric break; 53950b57cec5SDimitry Andric } 53960b57cec5SDimitry Andric } 53970b57cec5SDimitry Andric 53980b57cec5SDimitry Andric if (!opIsLive) 5399349cc55cSDimitry Andric for (const SDNode *User : right->uses()) { 54000b57cec5SDimitry Andric int orderNo3 = User->getIROrder(); 54010b57cec5SDimitry Andric if (orderNo3 > orderNo) { 54020b57cec5SDimitry Andric opIsLive = true; 54030b57cec5SDimitry Andric break; 54040b57cec5SDimitry Andric } 54050b57cec5SDimitry Andric } 54060b57cec5SDimitry Andric 54070b57cec5SDimitry Andric if (!opIsLive) 54080b57cec5SDimitry Andric return SDValue(); 54090b57cec5SDimitry Andric } 54100b57cec5SDimitry Andric 54110fca6ea1SDimitry Andric return DCI.DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), 54120fca6ea1SDimitry Andric N0.getOperand(1), N1); 54130b57cec5SDimitry Andric } 54140b57cec5SDimitry Andric 54150b57cec5SDimitry Andric return SDValue(); 54160b57cec5SDimitry Andric } 54170b57cec5SDimitry Andric 54180fca6ea1SDimitry Andric static SDValue PerformStoreCombineHelper(SDNode *N, std::size_t Front, 54190fca6ea1SDimitry Andric std::size_t Back) { 54200fca6ea1SDimitry Andric if (all_of(N->ops().drop_front(Front).drop_back(Back), 54210fca6ea1SDimitry Andric [](const SDUse &U) { return U.get()->isUndef(); })) 54220fca6ea1SDimitry Andric // Operand 0 is the previous value in the chain. Cannot return EntryToken 54230fca6ea1SDimitry Andric // as the previous value will become unused and eliminated later. 54240fca6ea1SDimitry Andric return N->getOperand(0); 54250fca6ea1SDimitry Andric 54260fca6ea1SDimitry Andric return SDValue(); 54270fca6ea1SDimitry Andric } 54280fca6ea1SDimitry Andric 54290fca6ea1SDimitry Andric static SDValue PerformStoreParamCombine(SDNode *N) { 54300fca6ea1SDimitry Andric // Operands from the 3rd to the 2nd last one are the values to be stored. 54310fca6ea1SDimitry Andric // {Chain, ArgID, Offset, Val, Glue} 54320fca6ea1SDimitry Andric return PerformStoreCombineHelper(N, 3, 1); 54330fca6ea1SDimitry Andric } 54340fca6ea1SDimitry Andric 543581ad6265SDimitry Andric static SDValue PerformStoreRetvalCombine(SDNode *N) { 543681ad6265SDimitry Andric // Operands from the 2nd to the last one are the values to be stored 54370fca6ea1SDimitry Andric return PerformStoreCombineHelper(N, 2, 0); 543881ad6265SDimitry Andric } 543981ad6265SDimitry Andric 54400b57cec5SDimitry Andric /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 54410b57cec5SDimitry Andric /// 54420b57cec5SDimitry Andric static SDValue PerformADDCombine(SDNode *N, 54430b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI, 54440fca6ea1SDimitry Andric CodeGenOptLevel OptLevel) { 54450fca6ea1SDimitry Andric if (OptLevel == CodeGenOptLevel::None) 54460fca6ea1SDimitry Andric return SDValue(); 54470fca6ea1SDimitry Andric 54480fca6ea1SDimitry Andric SDValue N0 = N->getOperand(0); 54490fca6ea1SDimitry Andric SDValue N1 = N->getOperand(1); 54500fca6ea1SDimitry Andric 54510fca6ea1SDimitry Andric // Skip non-integer, non-scalar case 54520fca6ea1SDimitry Andric EVT VT = N0.getValueType(); 54530fca6ea1SDimitry Andric if (VT.isVector() || VT != MVT::i32) 54540fca6ea1SDimitry Andric return SDValue(); 54550fca6ea1SDimitry Andric 54560fca6ea1SDimitry Andric // First try with the default operand order. 54570fca6ea1SDimitry Andric if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI)) 54580fca6ea1SDimitry Andric return Result; 54590fca6ea1SDimitry Andric 54600fca6ea1SDimitry Andric // If that didn't work, try again with the operands commuted. 54610fca6ea1SDimitry Andric return PerformADDCombineWithOperands(N, N1, N0, DCI); 54620fca6ea1SDimitry Andric } 54630fca6ea1SDimitry Andric 54640fca6ea1SDimitry Andric /// PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD. 54650fca6ea1SDimitry Andric /// 54660fca6ea1SDimitry Andric static SDValue PerformFADDCombine(SDNode *N, 54670fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI, 54685f757f3fSDimitry Andric CodeGenOptLevel OptLevel) { 54690b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 54700b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 54710b57cec5SDimitry Andric 54720fca6ea1SDimitry Andric EVT VT = N0.getValueType(); 54730fca6ea1SDimitry Andric if (VT.isVector() || !(VT == MVT::f32 || VT == MVT::f64)) 54740fca6ea1SDimitry Andric return SDValue(); 54750fca6ea1SDimitry Andric 54760b57cec5SDimitry Andric // First try with the default operand order. 54770fca6ea1SDimitry Andric if (SDValue Result = PerformFADDCombineWithOperands(N, N0, N1, DCI, OptLevel)) 54780b57cec5SDimitry Andric return Result; 54790b57cec5SDimitry Andric 54800b57cec5SDimitry Andric // If that didn't work, try again with the operands commuted. 54810fca6ea1SDimitry Andric return PerformFADDCombineWithOperands(N, N1, N0, DCI, OptLevel); 54820b57cec5SDimitry Andric } 54830b57cec5SDimitry Andric 54840b57cec5SDimitry Andric static SDValue PerformANDCombine(SDNode *N, 54850b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 54860b57cec5SDimitry Andric // The type legalizer turns a vector load of i8 values into a zextload to i16 54870b57cec5SDimitry Andric // registers, optionally ANY_EXTENDs it (if target type is integer), 54880b57cec5SDimitry Andric // and ANDs off the high 8 bits. Since we turn this load into a 54890b57cec5SDimitry Andric // target-specific DAG node, the DAG combiner fails to eliminate these AND 54900b57cec5SDimitry Andric // nodes. Do that here. 54910b57cec5SDimitry Andric SDValue Val = N->getOperand(0); 54920b57cec5SDimitry Andric SDValue Mask = N->getOperand(1); 54930b57cec5SDimitry Andric 54940b57cec5SDimitry Andric if (isa<ConstantSDNode>(Val)) { 54950b57cec5SDimitry Andric std::swap(Val, Mask); 54960b57cec5SDimitry Andric } 54970b57cec5SDimitry Andric 54980b57cec5SDimitry Andric SDValue AExt; 54995f757f3fSDimitry Andric 55005f757f3fSDimitry Andric // Convert BFE-> truncate i16 -> and 255 55015f757f3fSDimitry Andric // To just BFE-> truncate i16, as the value already has all the bits in the 55025f757f3fSDimitry Andric // right places. 55035f757f3fSDimitry Andric if (Val.getOpcode() == ISD::TRUNCATE) { 55045f757f3fSDimitry Andric SDValue BFE = Val.getOperand(0); 55055f757f3fSDimitry Andric if (BFE.getOpcode() != NVPTXISD::BFE) 55065f757f3fSDimitry Andric return SDValue(); 55075f757f3fSDimitry Andric 55085f757f3fSDimitry Andric ConstantSDNode *BFEBits = dyn_cast<ConstantSDNode>(BFE.getOperand(0)); 55095f757f3fSDimitry Andric if (!BFEBits) 55105f757f3fSDimitry Andric return SDValue(); 55115f757f3fSDimitry Andric uint64_t BFEBitsVal = BFEBits->getZExtValue(); 55125f757f3fSDimitry Andric 55135f757f3fSDimitry Andric ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); 55145f757f3fSDimitry Andric if (!MaskCnst) { 55155f757f3fSDimitry Andric // Not an AND with a constant 55165f757f3fSDimitry Andric return SDValue(); 55175f757f3fSDimitry Andric } 55185f757f3fSDimitry Andric uint64_t MaskVal = MaskCnst->getZExtValue(); 55195f757f3fSDimitry Andric 55205f757f3fSDimitry Andric if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1) 55215f757f3fSDimitry Andric return SDValue(); 55225f757f3fSDimitry Andric // If we get here, the AND is unnecessary. Just replace it with the trunc 55235f757f3fSDimitry Andric DCI.CombineTo(N, Val, false); 55245f757f3fSDimitry Andric } 55250b57cec5SDimitry Andric // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and 55260b57cec5SDimitry Andric if (Val.getOpcode() == ISD::ANY_EXTEND) { 55270b57cec5SDimitry Andric AExt = Val; 55280b57cec5SDimitry Andric Val = Val->getOperand(0); 55290b57cec5SDimitry Andric } 55300b57cec5SDimitry Andric 55310b57cec5SDimitry Andric if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { 55320b57cec5SDimitry Andric Val = Val->getOperand(0); 55330b57cec5SDimitry Andric } 55340b57cec5SDimitry Andric 55350b57cec5SDimitry Andric if (Val->getOpcode() == NVPTXISD::LoadV2 || 55360b57cec5SDimitry Andric Val->getOpcode() == NVPTXISD::LoadV4) { 55370b57cec5SDimitry Andric ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); 55380b57cec5SDimitry Andric if (!MaskCnst) { 55390b57cec5SDimitry Andric // Not an AND with a constant 55400b57cec5SDimitry Andric return SDValue(); 55410b57cec5SDimitry Andric } 55420b57cec5SDimitry Andric 55430b57cec5SDimitry Andric uint64_t MaskVal = MaskCnst->getZExtValue(); 55440b57cec5SDimitry Andric if (MaskVal != 0xff) { 55450b57cec5SDimitry Andric // Not an AND that chops off top 8 bits 55460b57cec5SDimitry Andric return SDValue(); 55470b57cec5SDimitry Andric } 55480b57cec5SDimitry Andric 55490b57cec5SDimitry Andric MemSDNode *Mem = dyn_cast<MemSDNode>(Val); 55500b57cec5SDimitry Andric if (!Mem) { 55510b57cec5SDimitry Andric // Not a MemSDNode?!? 55520b57cec5SDimitry Andric return SDValue(); 55530b57cec5SDimitry Andric } 55540b57cec5SDimitry Andric 55550b57cec5SDimitry Andric EVT MemVT = Mem->getMemoryVT(); 55560b57cec5SDimitry Andric if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { 55570b57cec5SDimitry Andric // We only handle the i8 case 55580b57cec5SDimitry Andric return SDValue(); 55590b57cec5SDimitry Andric } 55600b57cec5SDimitry Andric 55617a6dacacSDimitry Andric unsigned ExtType = Val->getConstantOperandVal(Val->getNumOperands() - 1); 55620b57cec5SDimitry Andric if (ExtType == ISD::SEXTLOAD) { 55630b57cec5SDimitry Andric // If for some reason the load is a sextload, the and is needed to zero 55640b57cec5SDimitry Andric // out the high 8 bits 55650b57cec5SDimitry Andric return SDValue(); 55660b57cec5SDimitry Andric } 55670b57cec5SDimitry Andric 55680b57cec5SDimitry Andric bool AddTo = false; 55690b57cec5SDimitry Andric if (AExt.getNode() != nullptr) { 55700b57cec5SDimitry Andric // Re-insert the ext as a zext. 55710b57cec5SDimitry Andric Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 55720b57cec5SDimitry Andric AExt.getValueType(), Val); 55730b57cec5SDimitry Andric AddTo = true; 55740b57cec5SDimitry Andric } 55750b57cec5SDimitry Andric 55760b57cec5SDimitry Andric // If we get here, the AND is unnecessary. Just replace it with the load 55770b57cec5SDimitry Andric DCI.CombineTo(N, Val, AddTo); 55780b57cec5SDimitry Andric } 55790b57cec5SDimitry Andric 55800b57cec5SDimitry Andric return SDValue(); 55810b57cec5SDimitry Andric } 55820b57cec5SDimitry Andric 55830b57cec5SDimitry Andric static SDValue PerformREMCombine(SDNode *N, 55840b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI, 55855f757f3fSDimitry Andric CodeGenOptLevel OptLevel) { 55860b57cec5SDimitry Andric assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM); 55870b57cec5SDimitry Andric 55880b57cec5SDimitry Andric // Don't do anything at less than -O2. 55895f757f3fSDimitry Andric if (OptLevel < CodeGenOptLevel::Default) 55900b57cec5SDimitry Andric return SDValue(); 55910b57cec5SDimitry Andric 55920b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 55930b57cec5SDimitry Andric SDLoc DL(N); 55940b57cec5SDimitry Andric EVT VT = N->getValueType(0); 55950b57cec5SDimitry Andric bool IsSigned = N->getOpcode() == ISD::SREM; 55960b57cec5SDimitry Andric unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV; 55970b57cec5SDimitry Andric 55980b57cec5SDimitry Andric const SDValue &Num = N->getOperand(0); 55990b57cec5SDimitry Andric const SDValue &Den = N->getOperand(1); 56000b57cec5SDimitry Andric 56010b57cec5SDimitry Andric for (const SDNode *U : Num->uses()) { 56020b57cec5SDimitry Andric if (U->getOpcode() == DivOpc && U->getOperand(0) == Num && 56030b57cec5SDimitry Andric U->getOperand(1) == Den) { 56040b57cec5SDimitry Andric // Num % Den -> Num - (Num / Den) * Den 56050b57cec5SDimitry Andric return DAG.getNode(ISD::SUB, DL, VT, Num, 56060b57cec5SDimitry Andric DAG.getNode(ISD::MUL, DL, VT, 56070b57cec5SDimitry Andric DAG.getNode(DivOpc, DL, VT, Num, Den), 56080b57cec5SDimitry Andric Den)); 56090b57cec5SDimitry Andric } 56100b57cec5SDimitry Andric } 56110b57cec5SDimitry Andric return SDValue(); 56120b57cec5SDimitry Andric } 56130b57cec5SDimitry Andric 56140b57cec5SDimitry Andric enum OperandSignedness { 56150b57cec5SDimitry Andric Signed = 0, 56160b57cec5SDimitry Andric Unsigned, 56170b57cec5SDimitry Andric Unknown 56180b57cec5SDimitry Andric }; 56190b57cec5SDimitry Andric 56200b57cec5SDimitry Andric /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand 56210b57cec5SDimitry Andric /// that can be demoted to \p OptSize bits without loss of information. The 56220b57cec5SDimitry Andric /// signedness of the operand, if determinable, is placed in \p S. 56230b57cec5SDimitry Andric static bool IsMulWideOperandDemotable(SDValue Op, 56240b57cec5SDimitry Andric unsigned OptSize, 56250b57cec5SDimitry Andric OperandSignedness &S) { 56260b57cec5SDimitry Andric S = Unknown; 56270b57cec5SDimitry Andric 56280b57cec5SDimitry Andric if (Op.getOpcode() == ISD::SIGN_EXTEND || 56290b57cec5SDimitry Andric Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { 56300b57cec5SDimitry Andric EVT OrigVT = Op.getOperand(0).getValueType(); 5631e8d8bef9SDimitry Andric if (OrigVT.getFixedSizeInBits() <= OptSize) { 56320b57cec5SDimitry Andric S = Signed; 56330b57cec5SDimitry Andric return true; 56340b57cec5SDimitry Andric } 56350b57cec5SDimitry Andric } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { 56360b57cec5SDimitry Andric EVT OrigVT = Op.getOperand(0).getValueType(); 5637e8d8bef9SDimitry Andric if (OrigVT.getFixedSizeInBits() <= OptSize) { 56380b57cec5SDimitry Andric S = Unsigned; 56390b57cec5SDimitry Andric return true; 56400b57cec5SDimitry Andric } 56410b57cec5SDimitry Andric } 56420b57cec5SDimitry Andric 56430b57cec5SDimitry Andric return false; 56440b57cec5SDimitry Andric } 56450b57cec5SDimitry Andric 56460b57cec5SDimitry Andric /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can 56470b57cec5SDimitry Andric /// be demoted to \p OptSize bits without loss of information. If the operands 56480b57cec5SDimitry Andric /// contain a constant, it should appear as the RHS operand. The signedness of 56490b57cec5SDimitry Andric /// the operands is placed in \p IsSigned. 56500b57cec5SDimitry Andric static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, 56510b57cec5SDimitry Andric unsigned OptSize, 56520b57cec5SDimitry Andric bool &IsSigned) { 56530b57cec5SDimitry Andric OperandSignedness LHSSign; 56540b57cec5SDimitry Andric 56550b57cec5SDimitry Andric // The LHS operand must be a demotable op 56560b57cec5SDimitry Andric if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) 56570b57cec5SDimitry Andric return false; 56580b57cec5SDimitry Andric 56590b57cec5SDimitry Andric // We should have been able to determine the signedness from the LHS 56600b57cec5SDimitry Andric if (LHSSign == Unknown) 56610b57cec5SDimitry Andric return false; 56620b57cec5SDimitry Andric 56630b57cec5SDimitry Andric IsSigned = (LHSSign == Signed); 56640b57cec5SDimitry Andric 56650b57cec5SDimitry Andric // The RHS can be a demotable op or a constant 56660b57cec5SDimitry Andric if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { 56670b57cec5SDimitry Andric const APInt &Val = CI->getAPIntValue(); 56680b57cec5SDimitry Andric if (LHSSign == Unsigned) { 56690b57cec5SDimitry Andric return Val.isIntN(OptSize); 56700b57cec5SDimitry Andric } else { 56710b57cec5SDimitry Andric return Val.isSignedIntN(OptSize); 56720b57cec5SDimitry Andric } 56730b57cec5SDimitry Andric } else { 56740b57cec5SDimitry Andric OperandSignedness RHSSign; 56750b57cec5SDimitry Andric if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) 56760b57cec5SDimitry Andric return false; 56770b57cec5SDimitry Andric 56780b57cec5SDimitry Andric return LHSSign == RHSSign; 56790b57cec5SDimitry Andric } 56800b57cec5SDimitry Andric } 56810b57cec5SDimitry Andric 56820b57cec5SDimitry Andric /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply 56830b57cec5SDimitry Andric /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform 56840b57cec5SDimitry Andric /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift 56850b57cec5SDimitry Andric /// amount. 56860b57cec5SDimitry Andric static SDValue TryMULWIDECombine(SDNode *N, 56870b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 56880b57cec5SDimitry Andric EVT MulType = N->getValueType(0); 56890b57cec5SDimitry Andric if (MulType != MVT::i32 && MulType != MVT::i64) { 56900b57cec5SDimitry Andric return SDValue(); 56910b57cec5SDimitry Andric } 56920b57cec5SDimitry Andric 56930b57cec5SDimitry Andric SDLoc DL(N); 56940b57cec5SDimitry Andric unsigned OptSize = MulType.getSizeInBits() >> 1; 56950b57cec5SDimitry Andric SDValue LHS = N->getOperand(0); 56960b57cec5SDimitry Andric SDValue RHS = N->getOperand(1); 56970b57cec5SDimitry Andric 56980b57cec5SDimitry Andric // Canonicalize the multiply so the constant (if any) is on the right 56990b57cec5SDimitry Andric if (N->getOpcode() == ISD::MUL) { 57000b57cec5SDimitry Andric if (isa<ConstantSDNode>(LHS)) { 57010b57cec5SDimitry Andric std::swap(LHS, RHS); 57020b57cec5SDimitry Andric } 57030b57cec5SDimitry Andric } 57040b57cec5SDimitry Andric 57050b57cec5SDimitry Andric // If we have a SHL, determine the actual multiply amount 57060b57cec5SDimitry Andric if (N->getOpcode() == ISD::SHL) { 57070b57cec5SDimitry Andric ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); 57080b57cec5SDimitry Andric if (!ShlRHS) { 57090b57cec5SDimitry Andric return SDValue(); 57100b57cec5SDimitry Andric } 57110b57cec5SDimitry Andric 57120b57cec5SDimitry Andric APInt ShiftAmt = ShlRHS->getAPIntValue(); 57130b57cec5SDimitry Andric unsigned BitWidth = MulType.getSizeInBits(); 57140b57cec5SDimitry Andric if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { 57150b57cec5SDimitry Andric APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; 57160b57cec5SDimitry Andric RHS = DCI.DAG.getConstant(MulVal, DL, MulType); 57170b57cec5SDimitry Andric } else { 57180b57cec5SDimitry Andric return SDValue(); 57190b57cec5SDimitry Andric } 57200b57cec5SDimitry Andric } 57210b57cec5SDimitry Andric 57220b57cec5SDimitry Andric bool Signed; 57230b57cec5SDimitry Andric // Verify that our operands are demotable 57240b57cec5SDimitry Andric if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { 57250b57cec5SDimitry Andric return SDValue(); 57260b57cec5SDimitry Andric } 57270b57cec5SDimitry Andric 57280b57cec5SDimitry Andric EVT DemotedVT; 57290b57cec5SDimitry Andric if (MulType == MVT::i32) { 57300b57cec5SDimitry Andric DemotedVT = MVT::i16; 57310b57cec5SDimitry Andric } else { 57320b57cec5SDimitry Andric DemotedVT = MVT::i32; 57330b57cec5SDimitry Andric } 57340b57cec5SDimitry Andric 57350b57cec5SDimitry Andric // Truncate the operands to the correct size. Note that these are just for 57360b57cec5SDimitry Andric // type consistency and will (likely) be eliminated in later phases. 57370b57cec5SDimitry Andric SDValue TruncLHS = 57380b57cec5SDimitry Andric DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); 57390b57cec5SDimitry Andric SDValue TruncRHS = 57400b57cec5SDimitry Andric DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); 57410b57cec5SDimitry Andric 57420b57cec5SDimitry Andric unsigned Opc; 57430b57cec5SDimitry Andric if (Signed) { 57440b57cec5SDimitry Andric Opc = NVPTXISD::MUL_WIDE_SIGNED; 57450b57cec5SDimitry Andric } else { 57460b57cec5SDimitry Andric Opc = NVPTXISD::MUL_WIDE_UNSIGNED; 57470b57cec5SDimitry Andric } 57480b57cec5SDimitry Andric 57490b57cec5SDimitry Andric return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); 57500b57cec5SDimitry Andric } 57510b57cec5SDimitry Andric 57520fca6ea1SDimitry Andric static bool isConstOne(const SDValue &Operand) { 57530fca6ea1SDimitry Andric const auto *Const = dyn_cast<ConstantSDNode>(Operand); 57540fca6ea1SDimitry Andric return Const && Const->getZExtValue() == 1; 57550fca6ea1SDimitry Andric } 57560fca6ea1SDimitry Andric 57570fca6ea1SDimitry Andric static SDValue matchMADConstOnePattern(SDValue Add) { 57580fca6ea1SDimitry Andric if (Add->getOpcode() != ISD::ADD) 57590fca6ea1SDimitry Andric return SDValue(); 57600fca6ea1SDimitry Andric 57610fca6ea1SDimitry Andric if (isConstOne(Add->getOperand(0))) 57620fca6ea1SDimitry Andric return Add->getOperand(1); 57630fca6ea1SDimitry Andric 57640fca6ea1SDimitry Andric if (isConstOne(Add->getOperand(1))) 57650fca6ea1SDimitry Andric return Add->getOperand(0); 57660fca6ea1SDimitry Andric 57670fca6ea1SDimitry Andric return SDValue(); 57680fca6ea1SDimitry Andric } 57690fca6ea1SDimitry Andric 57700fca6ea1SDimitry Andric static SDValue combineMADConstOne(SDValue X, SDValue Add, EVT VT, SDLoc DL, 57710fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 57720fca6ea1SDimitry Andric 57730fca6ea1SDimitry Andric if (SDValue Y = matchMADConstOnePattern(Add)) 57740fca6ea1SDimitry Andric return DCI.DAG.getNode(NVPTXISD::IMAD, DL, VT, X, Y, X); 57750fca6ea1SDimitry Andric 57760fca6ea1SDimitry Andric return SDValue(); 57770fca6ea1SDimitry Andric } 57780fca6ea1SDimitry Andric 57790fca6ea1SDimitry Andric static SDValue combineMulSelectConstOne(SDValue X, SDValue Select, EVT VT, 57800fca6ea1SDimitry Andric SDLoc DL, 57810fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 57820fca6ea1SDimitry Andric if (Select->getOpcode() != ISD::SELECT) 57830fca6ea1SDimitry Andric return SDValue(); 57840fca6ea1SDimitry Andric 57850fca6ea1SDimitry Andric SDValue Cond = Select->getOperand(0); 57860fca6ea1SDimitry Andric 57870fca6ea1SDimitry Andric unsigned ConstOpNo; 57880fca6ea1SDimitry Andric if (isConstOne(Select->getOperand(1))) 57890fca6ea1SDimitry Andric ConstOpNo = 1; 57900fca6ea1SDimitry Andric else if (isConstOne(Select->getOperand(2))) 57910fca6ea1SDimitry Andric ConstOpNo = 2; 57920fca6ea1SDimitry Andric else 57930fca6ea1SDimitry Andric return SDValue(); 57940fca6ea1SDimitry Andric 57950fca6ea1SDimitry Andric SDValue Y = Select->getOperand((ConstOpNo == 1) ? 2 : 1); 57960fca6ea1SDimitry Andric 57970fca6ea1SDimitry Andric // Do not combine if the resulting sequence is not obviously profitable. 57980fca6ea1SDimitry Andric if (!matchMADConstOnePattern(Y)) 57990fca6ea1SDimitry Andric return SDValue(); 58000fca6ea1SDimitry Andric 58010fca6ea1SDimitry Andric SDValue NewMul = DCI.DAG.getNode(ISD::MUL, DL, VT, X, Y); 58020fca6ea1SDimitry Andric 58030fca6ea1SDimitry Andric return DCI.DAG.getNode(ISD::SELECT, DL, VT, Cond, 58040fca6ea1SDimitry Andric (ConstOpNo == 1) ? X : NewMul, 58050fca6ea1SDimitry Andric (ConstOpNo == 1) ? NewMul : X); 58060fca6ea1SDimitry Andric } 58070fca6ea1SDimitry Andric 58080fca6ea1SDimitry Andric static SDValue 58090fca6ea1SDimitry Andric PerformMULCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 58100fca6ea1SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 58110fca6ea1SDimitry Andric 58120fca6ea1SDimitry Andric EVT VT = N0.getValueType(); 58130fca6ea1SDimitry Andric if (VT.isVector()) 58140fca6ea1SDimitry Andric return SDValue(); 58150fca6ea1SDimitry Andric 58160fca6ea1SDimitry Andric if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) 58170fca6ea1SDimitry Andric return SDValue(); 58180fca6ea1SDimitry Andric 58190fca6ea1SDimitry Andric SDLoc DL(N); 58200fca6ea1SDimitry Andric 58210fca6ea1SDimitry Andric // (mul x, (add y, 1)) -> (mad x, y, x) 58220fca6ea1SDimitry Andric if (SDValue Res = combineMADConstOne(N0, N1, VT, DL, DCI)) 58230fca6ea1SDimitry Andric return Res; 58240fca6ea1SDimitry Andric if (SDValue Res = combineMADConstOne(N1, N0, VT, DL, DCI)) 58250fca6ea1SDimitry Andric return Res; 58260fca6ea1SDimitry Andric 58270fca6ea1SDimitry Andric // (mul x, (select y, 1)) -> (select (mul x, y), x) 58280fca6ea1SDimitry Andric if (SDValue Res = combineMulSelectConstOne(N0, N1, VT, DL, DCI)) 58290fca6ea1SDimitry Andric return Res; 58300fca6ea1SDimitry Andric if (SDValue Res = combineMulSelectConstOne(N1, N0, VT, DL, DCI)) 58310fca6ea1SDimitry Andric return Res; 58320fca6ea1SDimitry Andric 58330fca6ea1SDimitry Andric return SDValue(); 58340fca6ea1SDimitry Andric } 58350fca6ea1SDimitry Andric 58360b57cec5SDimitry Andric /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. 58370b57cec5SDimitry Andric static SDValue PerformMULCombine(SDNode *N, 58380b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI, 58395f757f3fSDimitry Andric CodeGenOptLevel OptLevel) { 58400fca6ea1SDimitry Andric if (OptLevel == CodeGenOptLevel::None) 58410fca6ea1SDimitry Andric return SDValue(); 58420fca6ea1SDimitry Andric 58430b57cec5SDimitry Andric if (SDValue Ret = TryMULWIDECombine(N, DCI)) 58440b57cec5SDimitry Andric return Ret; 58450b57cec5SDimitry Andric 58460fca6ea1SDimitry Andric SDValue N0 = N->getOperand(0); 58470fca6ea1SDimitry Andric SDValue N1 = N->getOperand(1); 58480fca6ea1SDimitry Andric return PerformMULCombineWithOperands(N, N0, N1, DCI); 58490b57cec5SDimitry Andric } 58500b57cec5SDimitry Andric 58510b57cec5SDimitry Andric /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. 58520b57cec5SDimitry Andric static SDValue PerformSHLCombine(SDNode *N, 58530b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI, 58545f757f3fSDimitry Andric CodeGenOptLevel OptLevel) { 58555f757f3fSDimitry Andric if (OptLevel > CodeGenOptLevel::None) { 58560b57cec5SDimitry Andric // Try mul.wide combining at OptLevel > 0 58570b57cec5SDimitry Andric if (SDValue Ret = TryMULWIDECombine(N, DCI)) 58580b57cec5SDimitry Andric return Ret; 58590b57cec5SDimitry Andric } 58600b57cec5SDimitry Andric 58610b57cec5SDimitry Andric return SDValue(); 58620b57cec5SDimitry Andric } 58630b57cec5SDimitry Andric 58640b57cec5SDimitry Andric static SDValue PerformSETCCCombine(SDNode *N, 58655f757f3fSDimitry Andric TargetLowering::DAGCombinerInfo &DCI, 58665f757f3fSDimitry Andric unsigned int SmVersion) { 58670b57cec5SDimitry Andric EVT CCType = N->getValueType(0); 58680b57cec5SDimitry Andric SDValue A = N->getOperand(0); 58690b57cec5SDimitry Andric SDValue B = N->getOperand(1); 58700b57cec5SDimitry Andric 58715f757f3fSDimitry Andric EVT AType = A.getValueType(); 58725f757f3fSDimitry Andric if (!(CCType == MVT::v2i1 && (AType == MVT::v2f16 || AType == MVT::v2bf16))) 58735f757f3fSDimitry Andric return SDValue(); 58745f757f3fSDimitry Andric 58755f757f3fSDimitry Andric if (A.getValueType() == MVT::v2bf16 && SmVersion < 90) 58760b57cec5SDimitry Andric return SDValue(); 58770b57cec5SDimitry Andric 58780b57cec5SDimitry Andric SDLoc DL(N); 58790b57cec5SDimitry Andric // setp.f16x2 returns two scalar predicates, which we need to 58800b57cec5SDimitry Andric // convert back to v2i1. The returned result will be scalarized by 58810b57cec5SDimitry Andric // the legalizer, but the comparison will remain a single vector 58820b57cec5SDimitry Andric // instruction. 58835f757f3fSDimitry Andric SDValue CCNode = DCI.DAG.getNode( 58845f757f3fSDimitry Andric A.getValueType() == MVT::v2f16 ? NVPTXISD::SETP_F16X2 58855f757f3fSDimitry Andric : NVPTXISD::SETP_BF16X2, 58865f757f3fSDimitry Andric DL, DCI.DAG.getVTList(MVT::i1, MVT::i1), {A, B, N->getOperand(2)}); 58870b57cec5SDimitry Andric return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0), 58880b57cec5SDimitry Andric CCNode.getValue(1)); 58890b57cec5SDimitry Andric } 58900b57cec5SDimitry Andric 58915f757f3fSDimitry Andric static SDValue PerformEXTRACTCombine(SDNode *N, 58925f757f3fSDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 58935f757f3fSDimitry Andric SDValue Vector = N->getOperand(0); 58945f757f3fSDimitry Andric SDLoc DL(N); 58955f757f3fSDimitry Andric EVT VectorVT = Vector.getValueType(); 58965f757f3fSDimitry Andric if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() && 58975f757f3fSDimitry Andric IsPTXVectorType(VectorVT.getSimpleVT())) 58985f757f3fSDimitry Andric return SDValue(); // Native vector loads already combine nicely w/ 58990fca6ea1SDimitry Andric // extract_vector_elt. 59000fca6ea1SDimitry Andric // Don't mess with singletons or v2*16, v4i8 and v8i8 types, we already 59010fca6ea1SDimitry Andric // handle them OK. 59025f757f3fSDimitry Andric if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) || 59030fca6ea1SDimitry Andric VectorVT == MVT::v4i8 || VectorVT == MVT::v8i8) 59040fca6ea1SDimitry Andric return SDValue(); 59050fca6ea1SDimitry Andric 59060fca6ea1SDimitry Andric // Don't mess with undef values as sra may be simplified to 0, not undef. 59070fca6ea1SDimitry Andric if (Vector->isUndef() || ISD::allOperandsUndef(Vector.getNode())) 59085f757f3fSDimitry Andric return SDValue(); 59095f757f3fSDimitry Andric 59105f757f3fSDimitry Andric uint64_t VectorBits = VectorVT.getSizeInBits(); 59115f757f3fSDimitry Andric // We only handle the types we can extract in-register. 59125f757f3fSDimitry Andric if (!(VectorBits == 16 || VectorBits == 32 || VectorBits == 64)) 59135f757f3fSDimitry Andric return SDValue(); 59145f757f3fSDimitry Andric 59155f757f3fSDimitry Andric ConstantSDNode *Index = dyn_cast<ConstantSDNode>(N->getOperand(1)); 59165f757f3fSDimitry Andric // Index == 0 is handled by generic DAG combiner. 59175f757f3fSDimitry Andric if (!Index || Index->getZExtValue() == 0) 59185f757f3fSDimitry Andric return SDValue(); 59195f757f3fSDimitry Andric 59205f757f3fSDimitry Andric MVT IVT = MVT::getIntegerVT(VectorBits); 59215f757f3fSDimitry Andric EVT EltVT = VectorVT.getVectorElementType(); 59225f757f3fSDimitry Andric EVT EltIVT = EltVT.changeTypeToInteger(); 59235f757f3fSDimitry Andric uint64_t EltBits = EltVT.getScalarSizeInBits(); 59245f757f3fSDimitry Andric 59255f757f3fSDimitry Andric SDValue Result = DCI.DAG.getNode( 59265f757f3fSDimitry Andric ISD::TRUNCATE, DL, EltIVT, 59275f757f3fSDimitry Andric DCI.DAG.getNode( 59285f757f3fSDimitry Andric ISD::SRA, DL, IVT, DCI.DAG.getNode(ISD::BITCAST, DL, IVT, Vector), 59295f757f3fSDimitry Andric DCI.DAG.getConstant(Index->getZExtValue() * EltBits, DL, IVT))); 59305f757f3fSDimitry Andric 59315f757f3fSDimitry Andric // If element has non-integer type, bitcast it back to the expected type. 59325f757f3fSDimitry Andric if (EltVT != EltIVT) 59335f757f3fSDimitry Andric Result = DCI.DAG.getNode(ISD::BITCAST, DL, EltVT, Result); 59345f757f3fSDimitry Andric // Past legalizer, we may need to extent i8 -> i16 to match the register type. 59355f757f3fSDimitry Andric if (EltVT != N->getValueType(0)) 59365f757f3fSDimitry Andric Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result); 59375f757f3fSDimitry Andric 59385f757f3fSDimitry Andric return Result; 59395f757f3fSDimitry Andric } 59405f757f3fSDimitry Andric 59415f757f3fSDimitry Andric static SDValue PerformVSELECTCombine(SDNode *N, 59425f757f3fSDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 59435f757f3fSDimitry Andric SDValue VA = N->getOperand(1); 59445f757f3fSDimitry Andric EVT VectorVT = VA.getValueType(); 59455f757f3fSDimitry Andric if (VectorVT != MVT::v4i8) 59465f757f3fSDimitry Andric return SDValue(); 59475f757f3fSDimitry Andric 59485f757f3fSDimitry Andric // We need to split vselect into individual per-element operations Because we 59495f757f3fSDimitry Andric // use BFE/BFI instruction for byte extraction/insertion, we do end up with 59505f757f3fSDimitry Andric // 32-bit values, so we may as well do comparison as i32 to avoid conversions 59515f757f3fSDimitry Andric // to/from i16 normally used for i8 values. 59525f757f3fSDimitry Andric SmallVector<SDValue, 4> E; 59535f757f3fSDimitry Andric SDLoc DL(N); 59545f757f3fSDimitry Andric SDValue VCond = N->getOperand(0); 59555f757f3fSDimitry Andric SDValue VB = N->getOperand(2); 59565f757f3fSDimitry Andric for (int I = 0; I < 4; ++I) { 59575f757f3fSDimitry Andric SDValue C = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i1, VCond, 59585f757f3fSDimitry Andric DCI.DAG.getConstant(I, DL, MVT::i32)); 59595f757f3fSDimitry Andric SDValue EA = DCI.DAG.getAnyExtOrTrunc( 59605f757f3fSDimitry Andric DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VA, 59615f757f3fSDimitry Andric DCI.DAG.getConstant(I, DL, MVT::i32)), 59625f757f3fSDimitry Andric DL, MVT::i32); 59635f757f3fSDimitry Andric SDValue EB = DCI.DAG.getAnyExtOrTrunc( 59645f757f3fSDimitry Andric DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VB, 59655f757f3fSDimitry Andric DCI.DAG.getConstant(I, DL, MVT::i32)), 59665f757f3fSDimitry Andric DL, MVT::i32); 59675f757f3fSDimitry Andric E.push_back(DCI.DAG.getAnyExtOrTrunc( 59685f757f3fSDimitry Andric DCI.DAG.getNode(ISD::SELECT, DL, MVT::i32, C, EA, EB), DL, MVT::i8)); 59695f757f3fSDimitry Andric } 59705f757f3fSDimitry Andric return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i8, E); 59715f757f3fSDimitry Andric } 59725f757f3fSDimitry Andric 59735f757f3fSDimitry Andric static SDValue PerformLOADCombine(SDNode *N, 59745f757f3fSDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 59755f757f3fSDimitry Andric SelectionDAG &DAG = DCI.DAG; 59765f757f3fSDimitry Andric LoadSDNode *LD = cast<LoadSDNode>(N); 59775f757f3fSDimitry Andric 59785f757f3fSDimitry Andric // Lower a v16i8 load into a LoadV4 operation with i32 results instead of 59795f757f3fSDimitry Andric // letting ReplaceLoadVector split it into smaller loads during legalization. 59805f757f3fSDimitry Andric // This is done at dag-combine1 time, so that vector operations with i8 59815f757f3fSDimitry Andric // elements can be optimised away instead of being needlessly split during 59825f757f3fSDimitry Andric // legalization, which involves storing to the stack and loading it back. 59835f757f3fSDimitry Andric EVT VT = N->getValueType(0); 59845f757f3fSDimitry Andric if (VT != MVT::v16i8) 59855f757f3fSDimitry Andric return SDValue(); 59865f757f3fSDimitry Andric 59875f757f3fSDimitry Andric SDLoc DL(N); 59885f757f3fSDimitry Andric 59895f757f3fSDimitry Andric // Create a v4i32 vector load operation, effectively <4 x v4i8>. 59905f757f3fSDimitry Andric unsigned Opc = NVPTXISD::LoadV4; 59915f757f3fSDimitry Andric EVT NewVT = MVT::v4i32; 59925f757f3fSDimitry Andric EVT EltVT = NewVT.getVectorElementType(); 59935f757f3fSDimitry Andric unsigned NumElts = NewVT.getVectorNumElements(); 59945f757f3fSDimitry Andric EVT RetVTs[] = {EltVT, EltVT, EltVT, EltVT, MVT::Other}; 59955f757f3fSDimitry Andric SDVTList RetVTList = DAG.getVTList(RetVTs); 59965f757f3fSDimitry Andric SmallVector<SDValue, 8> Ops(N->ops()); 59975f757f3fSDimitry Andric Ops.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); 59985f757f3fSDimitry Andric SDValue NewLoad = DAG.getMemIntrinsicNode(Opc, DL, RetVTList, Ops, NewVT, 59995f757f3fSDimitry Andric LD->getMemOperand()); 60005f757f3fSDimitry Andric SDValue NewChain = NewLoad.getValue(NumElts); 60015f757f3fSDimitry Andric 60025f757f3fSDimitry Andric // Create a vector of the same type returned by the original load. 60035f757f3fSDimitry Andric SmallVector<SDValue, 4> Elts; 60045f757f3fSDimitry Andric for (unsigned i = 0; i < NumElts; i++) 60055f757f3fSDimitry Andric Elts.push_back(NewLoad.getValue(i)); 60065f757f3fSDimitry Andric return DCI.DAG.getMergeValues( 60075f757f3fSDimitry Andric {DCI.DAG.getBitcast(VT, DCI.DAG.getBuildVector(NewVT, DL, Elts)), 60085f757f3fSDimitry Andric NewChain}, 60095f757f3fSDimitry Andric DL); 60105f757f3fSDimitry Andric } 60115f757f3fSDimitry Andric 60120b57cec5SDimitry Andric SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, 60130b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 60145f757f3fSDimitry Andric CodeGenOptLevel OptLevel = getTargetMachine().getOptLevel(); 60150b57cec5SDimitry Andric switch (N->getOpcode()) { 60160b57cec5SDimitry Andric default: break; 60170b57cec5SDimitry Andric case ISD::ADD: 60180fca6ea1SDimitry Andric return PerformADDCombine(N, DCI, OptLevel); 60190b57cec5SDimitry Andric case ISD::FADD: 60200fca6ea1SDimitry Andric return PerformFADDCombine(N, DCI, OptLevel); 60210b57cec5SDimitry Andric case ISD::MUL: 60220b57cec5SDimitry Andric return PerformMULCombine(N, DCI, OptLevel); 60230b57cec5SDimitry Andric case ISD::SHL: 60240b57cec5SDimitry Andric return PerformSHLCombine(N, DCI, OptLevel); 60250b57cec5SDimitry Andric case ISD::AND: 60260b57cec5SDimitry Andric return PerformANDCombine(N, DCI); 60270b57cec5SDimitry Andric case ISD::UREM: 60280b57cec5SDimitry Andric case ISD::SREM: 60290b57cec5SDimitry Andric return PerformREMCombine(N, DCI, OptLevel); 60300b57cec5SDimitry Andric case ISD::SETCC: 60315f757f3fSDimitry Andric return PerformSETCCCombine(N, DCI, STI.getSmVersion()); 60325f757f3fSDimitry Andric case ISD::LOAD: 60335f757f3fSDimitry Andric return PerformLOADCombine(N, DCI); 603481ad6265SDimitry Andric case NVPTXISD::StoreRetval: 603581ad6265SDimitry Andric case NVPTXISD::StoreRetvalV2: 603681ad6265SDimitry Andric case NVPTXISD::StoreRetvalV4: 603781ad6265SDimitry Andric return PerformStoreRetvalCombine(N); 60380fca6ea1SDimitry Andric case NVPTXISD::StoreParam: 60390fca6ea1SDimitry Andric case NVPTXISD::StoreParamV2: 60400fca6ea1SDimitry Andric case NVPTXISD::StoreParamV4: 60410fca6ea1SDimitry Andric return PerformStoreParamCombine(N); 60425f757f3fSDimitry Andric case ISD::EXTRACT_VECTOR_ELT: 60435f757f3fSDimitry Andric return PerformEXTRACTCombine(N, DCI); 60445f757f3fSDimitry Andric case ISD::VSELECT: 60455f757f3fSDimitry Andric return PerformVSELECTCombine(N, DCI); 60460b57cec5SDimitry Andric } 60470b57cec5SDimitry Andric return SDValue(); 60480b57cec5SDimitry Andric } 60490b57cec5SDimitry Andric 60500b57cec5SDimitry Andric /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. 60510b57cec5SDimitry Andric static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, 60520b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results) { 60530b57cec5SDimitry Andric EVT ResVT = N->getValueType(0); 60540b57cec5SDimitry Andric SDLoc DL(N); 60550b57cec5SDimitry Andric 60560b57cec5SDimitry Andric assert(ResVT.isVector() && "Vector load must have vector type"); 60570b57cec5SDimitry Andric 60580b57cec5SDimitry Andric // We only handle "native" vector sizes for now, e.g. <4 x double> is not 60590b57cec5SDimitry Andric // legal. We can (and should) split that into 2 loads of <2 x double> here 60600b57cec5SDimitry Andric // but I'm leaving that as a TODO for now. 60610b57cec5SDimitry Andric assert(ResVT.isSimple() && "Can only handle simple types"); 60620b57cec5SDimitry Andric switch (ResVT.getSimpleVT().SimpleTy) { 60630b57cec5SDimitry Andric default: 60640b57cec5SDimitry Andric return; 60650b57cec5SDimitry Andric case MVT::v2i8: 60660b57cec5SDimitry Andric case MVT::v2i16: 60670b57cec5SDimitry Andric case MVT::v2i32: 60680b57cec5SDimitry Andric case MVT::v2i64: 60690b57cec5SDimitry Andric case MVT::v2f16: 60700b57cec5SDimitry Andric case MVT::v2f32: 60710b57cec5SDimitry Andric case MVT::v2f64: 60720b57cec5SDimitry Andric case MVT::v4i8: 60730b57cec5SDimitry Andric case MVT::v4i16: 60740b57cec5SDimitry Andric case MVT::v4i32: 60750b57cec5SDimitry Andric case MVT::v4f16: 60760b57cec5SDimitry Andric case MVT::v4f32: 60770b57cec5SDimitry Andric case MVT::v8f16: // <4 x f16x2> 60785f757f3fSDimitry Andric case MVT::v8bf16: // <4 x bf16x2> 60795f757f3fSDimitry Andric case MVT::v8i16: // <4 x i16x2> 60800b57cec5SDimitry Andric // This is a "native" vector type 60810b57cec5SDimitry Andric break; 60820b57cec5SDimitry Andric } 60830b57cec5SDimitry Andric 60840b57cec5SDimitry Andric LoadSDNode *LD = cast<LoadSDNode>(N); 60850b57cec5SDimitry Andric 60865ffd83dbSDimitry Andric Align Alignment = LD->getAlign(); 60870b57cec5SDimitry Andric auto &TD = DAG.getDataLayout(); 608806c3fb27SDimitry Andric Align PrefAlign = 608906c3fb27SDimitry Andric TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext())); 60905ffd83dbSDimitry Andric if (Alignment < PrefAlign) { 60910b57cec5SDimitry Andric // This load is not sufficiently aligned, so bail out and let this vector 60920b57cec5SDimitry Andric // load be scalarized. Note that we may still be able to emit smaller 60930b57cec5SDimitry Andric // vector loads. For example, if we are loading a <4 x float> with an 60940b57cec5SDimitry Andric // alignment of 8, this check will fail but the legalizer will try again 60950b57cec5SDimitry Andric // with 2 x <2 x float>, which will succeed with an alignment of 8. 60960b57cec5SDimitry Andric return; 60970b57cec5SDimitry Andric } 60980b57cec5SDimitry Andric 60990b57cec5SDimitry Andric EVT EltVT = ResVT.getVectorElementType(); 61000b57cec5SDimitry Andric unsigned NumElts = ResVT.getVectorNumElements(); 61010b57cec5SDimitry Andric 61020b57cec5SDimitry Andric // Since LoadV2 is a target node, we cannot rely on DAG type legalization. 61030b57cec5SDimitry Andric // Therefore, we must ensure the type is legal. For i1 and i8, we set the 61040b57cec5SDimitry Andric // loaded type to i16 and propagate the "real" type as the memory type. 61050b57cec5SDimitry Andric bool NeedTrunc = false; 61060b57cec5SDimitry Andric if (EltVT.getSizeInBits() < 16) { 61070b57cec5SDimitry Andric EltVT = MVT::i16; 61080b57cec5SDimitry Andric NeedTrunc = true; 61090b57cec5SDimitry Andric } 61100b57cec5SDimitry Andric 61110b57cec5SDimitry Andric unsigned Opcode = 0; 61120b57cec5SDimitry Andric SDVTList LdResVTs; 61135f757f3fSDimitry Andric bool Load16x2 = false; 61140b57cec5SDimitry Andric 61150b57cec5SDimitry Andric switch (NumElts) { 61160b57cec5SDimitry Andric default: 61170b57cec5SDimitry Andric return; 61180b57cec5SDimitry Andric case 2: 61190b57cec5SDimitry Andric Opcode = NVPTXISD::LoadV2; 61200b57cec5SDimitry Andric LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 61210b57cec5SDimitry Andric break; 61220b57cec5SDimitry Andric case 4: { 61230b57cec5SDimitry Andric Opcode = NVPTXISD::LoadV4; 61240b57cec5SDimitry Andric EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 61250b57cec5SDimitry Andric LdResVTs = DAG.getVTList(ListVTs); 61260b57cec5SDimitry Andric break; 61270b57cec5SDimitry Andric } 61280b57cec5SDimitry Andric case 8: { 61290b57cec5SDimitry Andric // v8f16 is a special case. PTX doesn't have ld.v8.f16 61300b57cec5SDimitry Andric // instruction. Instead, we split the vector into v2f16 chunks and 61310b57cec5SDimitry Andric // load them with ld.v4.b32. 61325f757f3fSDimitry Andric assert(Is16bitsType(EltVT.getSimpleVT()) && "Unsupported v8 vector type."); 61335f757f3fSDimitry Andric Load16x2 = true; 61340b57cec5SDimitry Andric Opcode = NVPTXISD::LoadV4; 61355f757f3fSDimitry Andric EVT VVT; 61365f757f3fSDimitry Andric switch (EltVT.getSimpleVT().SimpleTy) { 61375f757f3fSDimitry Andric case MVT::f16: 61385f757f3fSDimitry Andric VVT = MVT::v2f16; 61395f757f3fSDimitry Andric break; 61405f757f3fSDimitry Andric case MVT::bf16: 61415f757f3fSDimitry Andric VVT = MVT::v2bf16; 61425f757f3fSDimitry Andric break; 61435f757f3fSDimitry Andric case MVT::i16: 61445f757f3fSDimitry Andric VVT = MVT::v2i16; 61455f757f3fSDimitry Andric break; 61465f757f3fSDimitry Andric default: 61475f757f3fSDimitry Andric llvm_unreachable("Unsupported v8 vector type."); 61485f757f3fSDimitry Andric } 6149bdd1243dSDimitry Andric EVT ListVTs[] = {VVT, VVT, VVT, VVT, MVT::Other}; 61500b57cec5SDimitry Andric LdResVTs = DAG.getVTList(ListVTs); 61510b57cec5SDimitry Andric break; 61520b57cec5SDimitry Andric } 61530b57cec5SDimitry Andric } 61540b57cec5SDimitry Andric 61550b57cec5SDimitry Andric // Copy regular operands 61560b57cec5SDimitry Andric SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); 61570b57cec5SDimitry Andric 61580b57cec5SDimitry Andric // The select routine does not have access to the LoadSDNode instance, so 61590b57cec5SDimitry Andric // pass along the extension information 61600b57cec5SDimitry Andric OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); 61610b57cec5SDimitry Andric 61620b57cec5SDimitry Andric SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 61630b57cec5SDimitry Andric LD->getMemoryVT(), 61640b57cec5SDimitry Andric LD->getMemOperand()); 61650b57cec5SDimitry Andric 61660b57cec5SDimitry Andric SmallVector<SDValue, 8> ScalarRes; 61675f757f3fSDimitry Andric if (Load16x2) { 61680b57cec5SDimitry Andric // Split v2f16 subvectors back into individual elements. 61690b57cec5SDimitry Andric NumElts /= 2; 61700b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) { 61710b57cec5SDimitry Andric SDValue SubVector = NewLD.getValue(i); 61720b57cec5SDimitry Andric SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, 61730b57cec5SDimitry Andric DAG.getIntPtrConstant(0, DL)); 61740b57cec5SDimitry Andric SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, 61750b57cec5SDimitry Andric DAG.getIntPtrConstant(1, DL)); 61760b57cec5SDimitry Andric ScalarRes.push_back(E0); 61770b57cec5SDimitry Andric ScalarRes.push_back(E1); 61780b57cec5SDimitry Andric } 61790b57cec5SDimitry Andric } else { 61800b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) { 61810b57cec5SDimitry Andric SDValue Res = NewLD.getValue(i); 61820b57cec5SDimitry Andric if (NeedTrunc) 61830b57cec5SDimitry Andric Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 61840b57cec5SDimitry Andric ScalarRes.push_back(Res); 61850b57cec5SDimitry Andric } 61860b57cec5SDimitry Andric } 61870b57cec5SDimitry Andric 61880b57cec5SDimitry Andric SDValue LoadChain = NewLD.getValue(NumElts); 61890b57cec5SDimitry Andric 61900b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes); 61910b57cec5SDimitry Andric 61920b57cec5SDimitry Andric Results.push_back(BuildVec); 61930b57cec5SDimitry Andric Results.push_back(LoadChain); 61940b57cec5SDimitry Andric } 61950b57cec5SDimitry Andric 61960b57cec5SDimitry Andric static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, 61970b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results) { 61980b57cec5SDimitry Andric SDValue Chain = N->getOperand(0); 61990b57cec5SDimitry Andric SDValue Intrin = N->getOperand(1); 62000b57cec5SDimitry Andric SDLoc DL(N); 62010b57cec5SDimitry Andric 62020b57cec5SDimitry Andric // Get the intrinsic ID 62031db9f3b2SDimitry Andric unsigned IntrinNo = Intrin.getNode()->getAsZExtVal(); 62040b57cec5SDimitry Andric switch (IntrinNo) { 62050b57cec5SDimitry Andric default: 62060b57cec5SDimitry Andric return; 62070b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_i: 62080b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_f: 62090b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_p: 62100b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_i: 62110b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_f: 62120b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_p: { 62130b57cec5SDimitry Andric EVT ResVT = N->getValueType(0); 62140b57cec5SDimitry Andric 62150b57cec5SDimitry Andric if (ResVT.isVector()) { 62160b57cec5SDimitry Andric // Vector LDG/LDU 62170b57cec5SDimitry Andric 62180b57cec5SDimitry Andric unsigned NumElts = ResVT.getVectorNumElements(); 62190b57cec5SDimitry Andric EVT EltVT = ResVT.getVectorElementType(); 62200b57cec5SDimitry Andric 62210b57cec5SDimitry Andric // Since LDU/LDG are target nodes, we cannot rely on DAG type 62220b57cec5SDimitry Andric // legalization. 62230b57cec5SDimitry Andric // Therefore, we must ensure the type is legal. For i1 and i8, we set the 62240b57cec5SDimitry Andric // loaded type to i16 and propagate the "real" type as the memory type. 62250b57cec5SDimitry Andric bool NeedTrunc = false; 62260b57cec5SDimitry Andric if (EltVT.getSizeInBits() < 16) { 62270b57cec5SDimitry Andric EltVT = MVT::i16; 62280b57cec5SDimitry Andric NeedTrunc = true; 62290b57cec5SDimitry Andric } 62300b57cec5SDimitry Andric 62310b57cec5SDimitry Andric unsigned Opcode = 0; 62320b57cec5SDimitry Andric SDVTList LdResVTs; 62330b57cec5SDimitry Andric 62340b57cec5SDimitry Andric switch (NumElts) { 62350b57cec5SDimitry Andric default: 62360b57cec5SDimitry Andric return; 62370b57cec5SDimitry Andric case 2: 62380b57cec5SDimitry Andric switch (IntrinNo) { 62390b57cec5SDimitry Andric default: 62400b57cec5SDimitry Andric return; 62410b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_i: 62420b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_f: 62430b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_p: 62440b57cec5SDimitry Andric Opcode = NVPTXISD::LDGV2; 62450b57cec5SDimitry Andric break; 62460b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_i: 62470b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_f: 62480b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_p: 62490b57cec5SDimitry Andric Opcode = NVPTXISD::LDUV2; 62500b57cec5SDimitry Andric break; 62510b57cec5SDimitry Andric } 62520b57cec5SDimitry Andric LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 62530b57cec5SDimitry Andric break; 62540b57cec5SDimitry Andric case 4: { 62550b57cec5SDimitry Andric switch (IntrinNo) { 62560b57cec5SDimitry Andric default: 62570b57cec5SDimitry Andric return; 62580b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_i: 62590b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_f: 62600b57cec5SDimitry Andric case Intrinsic::nvvm_ldg_global_p: 62610b57cec5SDimitry Andric Opcode = NVPTXISD::LDGV4; 62620b57cec5SDimitry Andric break; 62630b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_i: 62640b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_f: 62650b57cec5SDimitry Andric case Intrinsic::nvvm_ldu_global_p: 62660b57cec5SDimitry Andric Opcode = NVPTXISD::LDUV4; 62670b57cec5SDimitry Andric break; 62680b57cec5SDimitry Andric } 62690b57cec5SDimitry Andric EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 62700b57cec5SDimitry Andric LdResVTs = DAG.getVTList(ListVTs); 62710b57cec5SDimitry Andric break; 62720b57cec5SDimitry Andric } 62730b57cec5SDimitry Andric } 62740b57cec5SDimitry Andric 62750b57cec5SDimitry Andric SmallVector<SDValue, 8> OtherOps; 62760b57cec5SDimitry Andric 62770b57cec5SDimitry Andric // Copy regular operands 62780b57cec5SDimitry Andric 62790b57cec5SDimitry Andric OtherOps.push_back(Chain); // Chain 62800b57cec5SDimitry Andric // Skip operand 1 (intrinsic ID) 62810b57cec5SDimitry Andric // Others 62820b57cec5SDimitry Andric OtherOps.append(N->op_begin() + 2, N->op_end()); 62830b57cec5SDimitry Andric 62840b57cec5SDimitry Andric MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 62850b57cec5SDimitry Andric 62860b57cec5SDimitry Andric SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 62870b57cec5SDimitry Andric MemSD->getMemoryVT(), 62880b57cec5SDimitry Andric MemSD->getMemOperand()); 62890b57cec5SDimitry Andric 62900b57cec5SDimitry Andric SmallVector<SDValue, 4> ScalarRes; 62910b57cec5SDimitry Andric 62920b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) { 62930b57cec5SDimitry Andric SDValue Res = NewLD.getValue(i); 62940b57cec5SDimitry Andric if (NeedTrunc) 62950b57cec5SDimitry Andric Res = 62960b57cec5SDimitry Andric DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 62970b57cec5SDimitry Andric ScalarRes.push_back(Res); 62980b57cec5SDimitry Andric } 62990b57cec5SDimitry Andric 63000b57cec5SDimitry Andric SDValue LoadChain = NewLD.getValue(NumElts); 63010b57cec5SDimitry Andric 63020b57cec5SDimitry Andric SDValue BuildVec = 63030b57cec5SDimitry Andric DAG.getBuildVector(ResVT, DL, ScalarRes); 63040b57cec5SDimitry Andric 63050b57cec5SDimitry Andric Results.push_back(BuildVec); 63060b57cec5SDimitry Andric Results.push_back(LoadChain); 63070b57cec5SDimitry Andric } else { 63080b57cec5SDimitry Andric // i8 LDG/LDU 63090b57cec5SDimitry Andric assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && 63100b57cec5SDimitry Andric "Custom handling of non-i8 ldu/ldg?"); 63110b57cec5SDimitry Andric 63120b57cec5SDimitry Andric // Just copy all operands as-is 63130b57cec5SDimitry Andric SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); 63140b57cec5SDimitry Andric 63150b57cec5SDimitry Andric // Force output to i16 63160b57cec5SDimitry Andric SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); 63170b57cec5SDimitry Andric 63180b57cec5SDimitry Andric MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 63190b57cec5SDimitry Andric 63200b57cec5SDimitry Andric // We make sure the memory type is i8, which will be used during isel 63210b57cec5SDimitry Andric // to select the proper instruction. 63220b57cec5SDimitry Andric SDValue NewLD = 63230b57cec5SDimitry Andric DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, 63240b57cec5SDimitry Andric MVT::i8, MemSD->getMemOperand()); 63250b57cec5SDimitry Andric 63260b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, 63270b57cec5SDimitry Andric NewLD.getValue(0))); 63280b57cec5SDimitry Andric Results.push_back(NewLD.getValue(1)); 63290b57cec5SDimitry Andric } 63300b57cec5SDimitry Andric } 63310b57cec5SDimitry Andric } 63320b57cec5SDimitry Andric } 63330b57cec5SDimitry Andric 63340fca6ea1SDimitry Andric static void ReplaceCopyFromReg_128(SDNode *N, SelectionDAG &DAG, 63350fca6ea1SDimitry Andric SmallVectorImpl<SDValue> &Results) { 63360fca6ea1SDimitry Andric // Change the CopyFromReg to output 2 64-bit results instead of a 128-bit 63370fca6ea1SDimitry Andric // result so that it can pass the legalization 63380fca6ea1SDimitry Andric SDLoc DL(N); 63390fca6ea1SDimitry Andric SDValue Chain = N->getOperand(0); 63400fca6ea1SDimitry Andric SDValue Reg = N->getOperand(1); 63410fca6ea1SDimitry Andric SDValue Glue = N->getOperand(2); 63420fca6ea1SDimitry Andric 63430fca6ea1SDimitry Andric assert(Reg.getValueType() == MVT::i128 && 63440fca6ea1SDimitry Andric "Custom lowering for CopyFromReg with 128-bit reg only"); 63450fca6ea1SDimitry Andric SmallVector<EVT, 4> ResultsType = {MVT::i64, MVT::i64, N->getValueType(1), 63460fca6ea1SDimitry Andric N->getValueType(2)}; 63470fca6ea1SDimitry Andric SmallVector<SDValue, 3> NewOps = {Chain, Reg, Glue}; 63480fca6ea1SDimitry Andric 63490fca6ea1SDimitry Andric SDValue NewValue = DAG.getNode(ISD::CopyFromReg, DL, ResultsType, NewOps); 63500fca6ea1SDimitry Andric SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, 63510fca6ea1SDimitry Andric {NewValue.getValue(0), NewValue.getValue(1)}); 63520fca6ea1SDimitry Andric 63530fca6ea1SDimitry Andric Results.push_back(Pair); 63540fca6ea1SDimitry Andric Results.push_back(NewValue.getValue(2)); 63550fca6ea1SDimitry Andric Results.push_back(NewValue.getValue(3)); 63560fca6ea1SDimitry Andric } 63570fca6ea1SDimitry Andric 63580b57cec5SDimitry Andric void NVPTXTargetLowering::ReplaceNodeResults( 63590b57cec5SDimitry Andric SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 63600b57cec5SDimitry Andric switch (N->getOpcode()) { 63610b57cec5SDimitry Andric default: 63620b57cec5SDimitry Andric report_fatal_error("Unhandled custom legalization"); 63630b57cec5SDimitry Andric case ISD::LOAD: 63640b57cec5SDimitry Andric ReplaceLoadVector(N, DAG, Results); 63650b57cec5SDimitry Andric return; 63660b57cec5SDimitry Andric case ISD::INTRINSIC_W_CHAIN: 63670b57cec5SDimitry Andric ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); 63680b57cec5SDimitry Andric return; 63690fca6ea1SDimitry Andric case ISD::CopyFromReg: 63700fca6ea1SDimitry Andric ReplaceCopyFromReg_128(N, DAG, Results); 63710fca6ea1SDimitry Andric return; 63720b57cec5SDimitry Andric } 63730b57cec5SDimitry Andric } 63740b57cec5SDimitry Andric 637581ad6265SDimitry Andric NVPTXTargetLowering::AtomicExpansionKind 637681ad6265SDimitry Andric NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 637781ad6265SDimitry Andric Type *Ty = AI->getValOperand()->getType(); 637881ad6265SDimitry Andric 637981ad6265SDimitry Andric if (AI->isFloatingPointOperation()) { 638081ad6265SDimitry Andric if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) { 63810fca6ea1SDimitry Andric if (Ty->isHalfTy() && STI.getSmVersion() >= 70 && 63820fca6ea1SDimitry Andric STI.getPTXVersion() >= 63) 63830fca6ea1SDimitry Andric return AtomicExpansionKind::None; 63840fca6ea1SDimitry Andric if (Ty->isBFloatTy() && STI.getSmVersion() >= 90 && 63850fca6ea1SDimitry Andric STI.getPTXVersion() >= 78) 63860fca6ea1SDimitry Andric return AtomicExpansionKind::None; 638781ad6265SDimitry Andric if (Ty->isFloatTy()) 638881ad6265SDimitry Andric return AtomicExpansionKind::None; 638981ad6265SDimitry Andric if (Ty->isDoubleTy() && STI.hasAtomAddF64()) 639081ad6265SDimitry Andric return AtomicExpansionKind::None; 639181ad6265SDimitry Andric } 639281ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg; 639381ad6265SDimitry Andric } 639481ad6265SDimitry Andric 639581ad6265SDimitry Andric assert(Ty->isIntegerTy() && "Ty should be integer at this point"); 639681ad6265SDimitry Andric auto ITy = cast<llvm::IntegerType>(Ty); 639781ad6265SDimitry Andric 639881ad6265SDimitry Andric switch (AI->getOperation()) { 639981ad6265SDimitry Andric default: 640081ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg; 640181ad6265SDimitry Andric case AtomicRMWInst::BinOp::And: 640281ad6265SDimitry Andric case AtomicRMWInst::BinOp::Or: 640381ad6265SDimitry Andric case AtomicRMWInst::BinOp::Xor: 640481ad6265SDimitry Andric case AtomicRMWInst::BinOp::Xchg: 640581ad6265SDimitry Andric switch (ITy->getBitWidth()) { 640681ad6265SDimitry Andric case 8: 640781ad6265SDimitry Andric case 16: 640881ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg; 640981ad6265SDimitry Andric case 32: 641081ad6265SDimitry Andric return AtomicExpansionKind::None; 641181ad6265SDimitry Andric case 64: 641281ad6265SDimitry Andric if (STI.hasAtomBitwise64()) 641381ad6265SDimitry Andric return AtomicExpansionKind::None; 641481ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg; 641581ad6265SDimitry Andric default: 641681ad6265SDimitry Andric llvm_unreachable("unsupported width encountered"); 641781ad6265SDimitry Andric } 641881ad6265SDimitry Andric case AtomicRMWInst::BinOp::Add: 641981ad6265SDimitry Andric case AtomicRMWInst::BinOp::Sub: 642081ad6265SDimitry Andric case AtomicRMWInst::BinOp::Max: 642181ad6265SDimitry Andric case AtomicRMWInst::BinOp::Min: 642281ad6265SDimitry Andric case AtomicRMWInst::BinOp::UMax: 642381ad6265SDimitry Andric case AtomicRMWInst::BinOp::UMin: 642481ad6265SDimitry Andric switch (ITy->getBitWidth()) { 642581ad6265SDimitry Andric case 8: 642681ad6265SDimitry Andric case 16: 642781ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg; 642881ad6265SDimitry Andric case 32: 642981ad6265SDimitry Andric return AtomicExpansionKind::None; 643081ad6265SDimitry Andric case 64: 643181ad6265SDimitry Andric if (STI.hasAtomMinMax64()) 643281ad6265SDimitry Andric return AtomicExpansionKind::None; 643381ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg; 643481ad6265SDimitry Andric default: 643581ad6265SDimitry Andric llvm_unreachable("unsupported width encountered"); 643681ad6265SDimitry Andric } 643781ad6265SDimitry Andric } 643881ad6265SDimitry Andric 643981ad6265SDimitry Andric return AtomicExpansionKind::CmpXChg; 644081ad6265SDimitry Andric } 644181ad6265SDimitry Andric 64420b57cec5SDimitry Andric // Pin NVPTXTargetObjectFile's vtables to this file. 644381ad6265SDimitry Andric NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default; 64440b57cec5SDimitry Andric 64450b57cec5SDimitry Andric MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal( 64460b57cec5SDimitry Andric const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { 64470b57cec5SDimitry Andric return getDataSection(); 64480b57cec5SDimitry Andric } 6449