xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (revision 71ac745d76c3ba442e753daff1870893f272b29d)
10b57cec5SDimitry Andric //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines the interfaces that NVPTX uses to lower LLVM code into a
100b57cec5SDimitry Andric // selection DAG.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "NVPTXISelLowering.h"
150b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXBaseInfo.h"
160b57cec5SDimitry Andric #include "NVPTX.h"
170b57cec5SDimitry Andric #include "NVPTXSubtarget.h"
180b57cec5SDimitry Andric #include "NVPTXTargetMachine.h"
190b57cec5SDimitry Andric #include "NVPTXTargetObjectFile.h"
200b57cec5SDimitry Andric #include "NVPTXUtilities.h"
210b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
22e8d8bef9SDimitry Andric #include "llvm/ADT/STLExtras.h"
230b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
240b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h"
265f757f3fSDimitry Andric #include "llvm/CodeGen/ISDOpcodes.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAG.h"
300b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGNodes.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/TargetCallingConv.h"
320b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/ValueTypes.h"
340fca6ea1SDimitry Andric #include "llvm/CodeGenTypes/MachineValueType.h"
350b57cec5SDimitry Andric #include "llvm/IR/Argument.h"
360b57cec5SDimitry Andric #include "llvm/IR/Attributes.h"
370b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
380b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
390b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
405f757f3fSDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
4181ad6265SDimitry Andric #include "llvm/IR/FPEnv.h"
420b57cec5SDimitry Andric #include "llvm/IR/Function.h"
430b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
440b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
450b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
46480093f4SDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h"
470b57cec5SDimitry Andric #include "llvm/IR/Module.h"
480b57cec5SDimitry Andric #include "llvm/IR/Type.h"
490b57cec5SDimitry Andric #include "llvm/IR/Value.h"
500fca6ea1SDimitry Andric #include "llvm/Support/Alignment.h"
510b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
520b57cec5SDimitry Andric #include "llvm/Support/CodeGen.h"
530b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
540b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
550b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
560b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
570b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
580b57cec5SDimitry Andric #include <algorithm>
590b57cec5SDimitry Andric #include <cassert>
60bdd1243dSDimitry Andric #include <cmath>
610b57cec5SDimitry Andric #include <cstdint>
620b57cec5SDimitry Andric #include <iterator>
630fca6ea1SDimitry Andric #include <optional>
640b57cec5SDimitry Andric #include <sstream>
650b57cec5SDimitry Andric #include <string>
660b57cec5SDimitry Andric #include <utility>
670b57cec5SDimitry Andric #include <vector>
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric #define DEBUG_TYPE "nvptx-lower"
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric using namespace llvm;
720b57cec5SDimitry Andric 
73e8d8bef9SDimitry Andric static std::atomic<unsigned> GlobalUniqueCallSite;
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric static cl::opt<bool> sched4reg(
760b57cec5SDimitry Andric     "nvptx-sched4reg",
770b57cec5SDimitry Andric     cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
780b57cec5SDimitry Andric 
7981ad6265SDimitry Andric static cl::opt<unsigned> FMAContractLevelOpt(
8081ad6265SDimitry Andric     "nvptx-fma-level", cl::Hidden,
810b57cec5SDimitry Andric     cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
820b57cec5SDimitry Andric              " 1: do it  2: do it aggressively"),
830b57cec5SDimitry Andric     cl::init(2));
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric static cl::opt<int> UsePrecDivF32(
8681ad6265SDimitry Andric     "nvptx-prec-divf32", cl::Hidden,
870b57cec5SDimitry Andric     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
880b57cec5SDimitry Andric              " IEEE Compliant F32 div.rnd if available."),
890b57cec5SDimitry Andric     cl::init(2));
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric static cl::opt<bool> UsePrecSqrtF32(
920b57cec5SDimitry Andric     "nvptx-prec-sqrtf32", cl::Hidden,
930b57cec5SDimitry Andric     cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
940b57cec5SDimitry Andric     cl::init(true));
950b57cec5SDimitry Andric 
9606c3fb27SDimitry Andric static cl::opt<bool> ForceMinByValParamAlign(
9706c3fb27SDimitry Andric     "nvptx-force-min-byval-param-align", cl::Hidden,
9806c3fb27SDimitry Andric     cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval"
9906c3fb27SDimitry Andric              " params of device functions."),
10006c3fb27SDimitry Andric     cl::init(false));
10106c3fb27SDimitry Andric 
1020b57cec5SDimitry Andric int NVPTXTargetLowering::getDivF32Level() const {
1030b57cec5SDimitry Andric   if (UsePrecDivF32.getNumOccurrences() > 0) {
1040b57cec5SDimitry Andric     // If nvptx-prec-div32=N is used on the command-line, always honor it
1050b57cec5SDimitry Andric     return UsePrecDivF32;
1060b57cec5SDimitry Andric   } else {
1070b57cec5SDimitry Andric     // Otherwise, use div.approx if fast math is enabled
1080b57cec5SDimitry Andric     if (getTargetMachine().Options.UnsafeFPMath)
1090b57cec5SDimitry Andric       return 0;
1100b57cec5SDimitry Andric     else
1110b57cec5SDimitry Andric       return 2;
1120b57cec5SDimitry Andric   }
1130b57cec5SDimitry Andric }
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric bool NVPTXTargetLowering::usePrecSqrtF32() const {
1160b57cec5SDimitry Andric   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
1170b57cec5SDimitry Andric     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
1180b57cec5SDimitry Andric     return UsePrecSqrtF32;
1190b57cec5SDimitry Andric   } else {
1200b57cec5SDimitry Andric     // Otherwise, use sqrt.approx if fast math is enabled
1210b57cec5SDimitry Andric     return !getTargetMachine().Options.UnsafeFPMath;
1220b57cec5SDimitry Andric   }
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
1265ffd83dbSDimitry Andric   return MF.getDenormalMode(APFloat::IEEEsingle()).Output ==
1275ffd83dbSDimitry Andric          DenormalMode::PreserveSign;
1280b57cec5SDimitry Andric }
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric static bool IsPTXVectorType(MVT VT) {
1310b57cec5SDimitry Andric   switch (VT.SimpleTy) {
1320b57cec5SDimitry Andric   default:
1330b57cec5SDimitry Andric     return false;
1340b57cec5SDimitry Andric   case MVT::v2i1:
1350b57cec5SDimitry Andric   case MVT::v4i1:
1360b57cec5SDimitry Andric   case MVT::v2i8:
1370b57cec5SDimitry Andric   case MVT::v4i8:
1380b57cec5SDimitry Andric   case MVT::v2i16:
1390b57cec5SDimitry Andric   case MVT::v4i16:
1405f757f3fSDimitry Andric   case MVT::v8i16: // <4 x i16x2>
1410b57cec5SDimitry Andric   case MVT::v2i32:
1420b57cec5SDimitry Andric   case MVT::v4i32:
1430b57cec5SDimitry Andric   case MVT::v2i64:
1440b57cec5SDimitry Andric   case MVT::v2f16:
1450b57cec5SDimitry Andric   case MVT::v4f16:
1460b57cec5SDimitry Andric   case MVT::v8f16: // <4 x f16x2>
147bdd1243dSDimitry Andric   case MVT::v2bf16:
148bdd1243dSDimitry Andric   case MVT::v4bf16:
149bdd1243dSDimitry Andric   case MVT::v8bf16: // <4 x bf16x2>
1500b57cec5SDimitry Andric   case MVT::v2f32:
1510b57cec5SDimitry Andric   case MVT::v4f32:
1520b57cec5SDimitry Andric   case MVT::v2f64:
1530b57cec5SDimitry Andric     return true;
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric }
1560b57cec5SDimitry Andric 
1575f757f3fSDimitry Andric static bool Is16bitsType(MVT VT) {
1585f757f3fSDimitry Andric   return (VT.SimpleTy == MVT::f16 || VT.SimpleTy == MVT::bf16 ||
1595f757f3fSDimitry Andric           VT.SimpleTy == MVT::i16);
16006c3fb27SDimitry Andric }
16106c3fb27SDimitry Andric 
1620b57cec5SDimitry Andric /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
1630b57cec5SDimitry Andric /// EVTs that compose it.  Unlike ComputeValueVTs, this will break apart vectors
1640b57cec5SDimitry Andric /// into their primitive components.
1650b57cec5SDimitry Andric /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
1660b57cec5SDimitry Andric /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
1670b57cec5SDimitry Andric /// LowerCall, and LowerReturn.
1680b57cec5SDimitry Andric static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
1690b57cec5SDimitry Andric                                Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
1700b57cec5SDimitry Andric                                SmallVectorImpl<uint64_t> *Offsets = nullptr,
1710b57cec5SDimitry Andric                                uint64_t StartingOffset = 0) {
1720b57cec5SDimitry Andric   SmallVector<EVT, 16> TempVTs;
1730b57cec5SDimitry Andric   SmallVector<uint64_t, 16> TempOffsets;
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   // Special case for i128 - decompose to (i64, i64)
1760b57cec5SDimitry Andric   if (Ty->isIntegerTy(128)) {
1770b57cec5SDimitry Andric     ValueVTs.push_back(EVT(MVT::i64));
1780b57cec5SDimitry Andric     ValueVTs.push_back(EVT(MVT::i64));
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric     if (Offsets) {
1810b57cec5SDimitry Andric       Offsets->push_back(StartingOffset + 0);
1820b57cec5SDimitry Andric       Offsets->push_back(StartingOffset + 8);
1830b57cec5SDimitry Andric     }
1840b57cec5SDimitry Andric 
1850b57cec5SDimitry Andric     return;
1860b57cec5SDimitry Andric   }
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric   // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs.
1890b57cec5SDimitry Andric   if (StructType *STy = dyn_cast<StructType>(Ty)) {
1900b57cec5SDimitry Andric     auto const *SL = DL.getStructLayout(STy);
1910b57cec5SDimitry Andric     auto ElementNum = 0;
1920b57cec5SDimitry Andric     for(auto *EI : STy->elements()) {
1930b57cec5SDimitry Andric       ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets,
1940b57cec5SDimitry Andric                          StartingOffset + SL->getElementOffset(ElementNum));
1950b57cec5SDimitry Andric       ++ElementNum;
1960b57cec5SDimitry Andric     }
1970b57cec5SDimitry Andric     return;
1980b57cec5SDimitry Andric   }
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric   ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
2010b57cec5SDimitry Andric   for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
2020b57cec5SDimitry Andric     EVT VT = TempVTs[i];
2030b57cec5SDimitry Andric     uint64_t Off = TempOffsets[i];
2040b57cec5SDimitry Andric     // Split vectors into individual elements, except for v2f16, which
2050b57cec5SDimitry Andric     // we will pass as a single scalar.
2060b57cec5SDimitry Andric     if (VT.isVector()) {
2070b57cec5SDimitry Andric       unsigned NumElts = VT.getVectorNumElements();
2080b57cec5SDimitry Andric       EVT EltVT = VT.getVectorElementType();
2090b57cec5SDimitry Andric       // Vectors with an even number of f16 elements will be passed to
210bdd1243dSDimitry Andric       // us as an array of v2f16/v2bf16 elements. We must match this so we
2110b57cec5SDimitry Andric       // stay in sync with Ins/Outs.
2125f757f3fSDimitry Andric       if ((Is16bitsType(EltVT.getSimpleVT())) && NumElts % 2 == 0) {
2135f757f3fSDimitry Andric         switch (EltVT.getSimpleVT().SimpleTy) {
2145f757f3fSDimitry Andric         case MVT::f16:
2155f757f3fSDimitry Andric           EltVT = MVT::v2f16;
2165f757f3fSDimitry Andric           break;
2175f757f3fSDimitry Andric         case MVT::bf16:
2185f757f3fSDimitry Andric           EltVT = MVT::v2bf16;
2195f757f3fSDimitry Andric           break;
2205f757f3fSDimitry Andric         case MVT::i16:
2215f757f3fSDimitry Andric           EltVT = MVT::v2i16;
2225f757f3fSDimitry Andric           break;
2235f757f3fSDimitry Andric         default:
2245f757f3fSDimitry Andric           llvm_unreachable("Unexpected type");
2255f757f3fSDimitry Andric         }
2260b57cec5SDimitry Andric         NumElts /= 2;
2275f757f3fSDimitry Andric       } else if (EltVT.getSimpleVT() == MVT::i8 &&
2285f757f3fSDimitry Andric                  (NumElts % 4 == 0 || NumElts == 3)) {
2295f757f3fSDimitry Andric         // v*i8 are formally lowered as v4i8
2305f757f3fSDimitry Andric         EltVT = MVT::v4i8;
2315f757f3fSDimitry Andric         NumElts = (NumElts + 3) / 4;
232*71ac745dSDimitry Andric       } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) {
233*71ac745dSDimitry Andric         // v2i8 is promoted to v2i16
234*71ac745dSDimitry Andric         NumElts = 1;
235*71ac745dSDimitry Andric         EltVT = MVT::v2i16;
2360b57cec5SDimitry Andric       }
2370b57cec5SDimitry Andric       for (unsigned j = 0; j != NumElts; ++j) {
2380b57cec5SDimitry Andric         ValueVTs.push_back(EltVT);
2390b57cec5SDimitry Andric         if (Offsets)
2400b57cec5SDimitry Andric           Offsets->push_back(Off + j * EltVT.getStoreSize());
2410b57cec5SDimitry Andric       }
2420b57cec5SDimitry Andric     } else {
2430b57cec5SDimitry Andric       ValueVTs.push_back(VT);
2440b57cec5SDimitry Andric       if (Offsets)
2450b57cec5SDimitry Andric         Offsets->push_back(Off);
2460b57cec5SDimitry Andric     }
2470b57cec5SDimitry Andric   }
2480b57cec5SDimitry Andric }
2490b57cec5SDimitry Andric 
250fcaf7f86SDimitry Andric /// PromoteScalarIntegerPTX
251fcaf7f86SDimitry Andric /// Used to make sure the arguments/returns are suitable for passing
252fcaf7f86SDimitry Andric /// and promote them to a larger size if they're not.
253fcaf7f86SDimitry Andric ///
254fcaf7f86SDimitry Andric /// The promoted type is placed in \p PromoteVT if the function returns true.
255fcaf7f86SDimitry Andric static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) {
256fcaf7f86SDimitry Andric   if (VT.isScalarInteger()) {
257fcaf7f86SDimitry Andric     switch (PowerOf2Ceil(VT.getFixedSizeInBits())) {
258fcaf7f86SDimitry Andric     default:
259fcaf7f86SDimitry Andric       llvm_unreachable(
260fcaf7f86SDimitry Andric           "Promotion is not suitable for scalars of size larger than 64-bits");
261fcaf7f86SDimitry Andric     case 1:
262fcaf7f86SDimitry Andric       *PromotedVT = MVT::i1;
263fcaf7f86SDimitry Andric       break;
264fcaf7f86SDimitry Andric     case 2:
265fcaf7f86SDimitry Andric     case 4:
266fcaf7f86SDimitry Andric     case 8:
267fcaf7f86SDimitry Andric       *PromotedVT = MVT::i8;
268fcaf7f86SDimitry Andric       break;
269fcaf7f86SDimitry Andric     case 16:
270fcaf7f86SDimitry Andric       *PromotedVT = MVT::i16;
271fcaf7f86SDimitry Andric       break;
272fcaf7f86SDimitry Andric     case 32:
273fcaf7f86SDimitry Andric       *PromotedVT = MVT::i32;
274fcaf7f86SDimitry Andric       break;
275fcaf7f86SDimitry Andric     case 64:
276fcaf7f86SDimitry Andric       *PromotedVT = MVT::i64;
277fcaf7f86SDimitry Andric       break;
278fcaf7f86SDimitry Andric     }
279fcaf7f86SDimitry Andric     return EVT(*PromotedVT) != VT;
280fcaf7f86SDimitry Andric   }
281fcaf7f86SDimitry Andric   return false;
282fcaf7f86SDimitry Andric }
283fcaf7f86SDimitry Andric 
2840b57cec5SDimitry Andric // Check whether we can merge loads/stores of some of the pieces of a
2850b57cec5SDimitry Andric // flattened function parameter or return value into a single vector
2860b57cec5SDimitry Andric // load/store.
2870b57cec5SDimitry Andric //
2880b57cec5SDimitry Andric // The flattened parameter is represented as a list of EVTs and
2890b57cec5SDimitry Andric // offsets, and the whole structure is aligned to ParamAlignment. This
2900b57cec5SDimitry Andric // function determines whether we can load/store pieces of the
2910b57cec5SDimitry Andric // parameter starting at index Idx using a single vectorized op of
2920b57cec5SDimitry Andric // size AccessSize. If so, it returns the number of param pieces
2930b57cec5SDimitry Andric // covered by the vector op. Otherwise, it returns 1.
2940b57cec5SDimitry Andric static unsigned CanMergeParamLoadStoresStartingAt(
2950b57cec5SDimitry Andric     unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
2965ffd83dbSDimitry Andric     const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
2970b57cec5SDimitry Andric 
2980b57cec5SDimitry Andric   // Can't vectorize if param alignment is not sufficient.
2995ffd83dbSDimitry Andric   if (ParamAlignment < AccessSize)
3000b57cec5SDimitry Andric     return 1;
3010b57cec5SDimitry Andric   // Can't vectorize if offset is not aligned.
3020b57cec5SDimitry Andric   if (Offsets[Idx] & (AccessSize - 1))
3030b57cec5SDimitry Andric     return 1;
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric   EVT EltVT = ValueVTs[Idx];
3060b57cec5SDimitry Andric   unsigned EltSize = EltVT.getStoreSize();
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric   // Element is too large to vectorize.
3090b57cec5SDimitry Andric   if (EltSize >= AccessSize)
3100b57cec5SDimitry Andric     return 1;
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric   unsigned NumElts = AccessSize / EltSize;
3130b57cec5SDimitry Andric   // Can't vectorize if AccessBytes if not a multiple of EltSize.
3140b57cec5SDimitry Andric   if (AccessSize != EltSize * NumElts)
3150b57cec5SDimitry Andric     return 1;
3160b57cec5SDimitry Andric 
3170b57cec5SDimitry Andric   // We don't have enough elements to vectorize.
3180b57cec5SDimitry Andric   if (Idx + NumElts > ValueVTs.size())
3190b57cec5SDimitry Andric     return 1;
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   // PTX ISA can only deal with 2- and 4-element vector ops.
3220b57cec5SDimitry Andric   if (NumElts != 4 && NumElts != 2)
3230b57cec5SDimitry Andric     return 1;
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric   for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
3260b57cec5SDimitry Andric     // Types do not match.
3270b57cec5SDimitry Andric     if (ValueVTs[j] != EltVT)
3280b57cec5SDimitry Andric       return 1;
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric     // Elements are not contiguous.
3310b57cec5SDimitry Andric     if (Offsets[j] - Offsets[j - 1] != EltSize)
3320b57cec5SDimitry Andric       return 1;
3330b57cec5SDimitry Andric   }
3340b57cec5SDimitry Andric   // OK. We can vectorize ValueVTs[i..i+NumElts)
3350b57cec5SDimitry Andric   return NumElts;
3360b57cec5SDimitry Andric }
3370b57cec5SDimitry Andric 
3380b57cec5SDimitry Andric // Flags for tracking per-element vectorization state of loads/stores
3390b57cec5SDimitry Andric // of a flattened function parameter or return value.
3400b57cec5SDimitry Andric enum ParamVectorizationFlags {
3410b57cec5SDimitry Andric   PVF_INNER = 0x0, // Middle elements of a vector.
3420b57cec5SDimitry Andric   PVF_FIRST = 0x1, // First element of the vector.
3430b57cec5SDimitry Andric   PVF_LAST = 0x2,  // Last element of the vector.
3440b57cec5SDimitry Andric   // Scalar is effectively a 1-element vector.
3450b57cec5SDimitry Andric   PVF_SCALAR = PVF_FIRST | PVF_LAST
3460b57cec5SDimitry Andric };
3470b57cec5SDimitry Andric 
3480b57cec5SDimitry Andric // Computes whether and how we can vectorize the loads/stores of a
3490b57cec5SDimitry Andric // flattened function parameter or return value.
3500b57cec5SDimitry Andric //
3510b57cec5SDimitry Andric // The flattened parameter is represented as the list of ValueVTs and
3520b57cec5SDimitry Andric // Offsets, and is aligned to ParamAlignment bytes. We return a vector
3530b57cec5SDimitry Andric // of the same size as ValueVTs indicating how each piece should be
3540b57cec5SDimitry Andric // loaded/stored (i.e. as a scalar, or as part of a vector
3550b57cec5SDimitry Andric // load/store).
3560b57cec5SDimitry Andric static SmallVector<ParamVectorizationFlags, 16>
3570b57cec5SDimitry Andric VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
3580b57cec5SDimitry Andric                      const SmallVectorImpl<uint64_t> &Offsets,
359bdd1243dSDimitry Andric                      Align ParamAlignment, bool IsVAArg = false) {
3600b57cec5SDimitry Andric   // Set vector size to match ValueVTs and mark all elements as
3610b57cec5SDimitry Andric   // scalars by default.
3620b57cec5SDimitry Andric   SmallVector<ParamVectorizationFlags, 16> VectorInfo;
3630b57cec5SDimitry Andric   VectorInfo.assign(ValueVTs.size(), PVF_SCALAR);
3640b57cec5SDimitry Andric 
365bdd1243dSDimitry Andric   if (IsVAArg)
366bdd1243dSDimitry Andric     return VectorInfo;
367bdd1243dSDimitry Andric 
3680b57cec5SDimitry Andric   // Check what we can vectorize using 128/64/32-bit accesses.
3690b57cec5SDimitry Andric   for (int I = 0, E = ValueVTs.size(); I != E; ++I) {
3700b57cec5SDimitry Andric     // Skip elements we've already processed.
3710b57cec5SDimitry Andric     assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.");
3720b57cec5SDimitry Andric     for (unsigned AccessSize : {16, 8, 4, 2}) {
3730b57cec5SDimitry Andric       unsigned NumElts = CanMergeParamLoadStoresStartingAt(
3740b57cec5SDimitry Andric           I, AccessSize, ValueVTs, Offsets, ParamAlignment);
3750b57cec5SDimitry Andric       // Mark vectorized elements.
3760b57cec5SDimitry Andric       switch (NumElts) {
3770b57cec5SDimitry Andric       default:
3780b57cec5SDimitry Andric         llvm_unreachable("Unexpected return value");
3790b57cec5SDimitry Andric       case 1:
3800b57cec5SDimitry Andric         // Can't vectorize using this size, try next smaller size.
3810b57cec5SDimitry Andric         continue;
3820b57cec5SDimitry Andric       case 2:
3830b57cec5SDimitry Andric         assert(I + 1 < E && "Not enough elements.");
3840b57cec5SDimitry Andric         VectorInfo[I] = PVF_FIRST;
3850b57cec5SDimitry Andric         VectorInfo[I + 1] = PVF_LAST;
3860b57cec5SDimitry Andric         I += 1;
3870b57cec5SDimitry Andric         break;
3880b57cec5SDimitry Andric       case 4:
3890b57cec5SDimitry Andric         assert(I + 3 < E && "Not enough elements.");
3900b57cec5SDimitry Andric         VectorInfo[I] = PVF_FIRST;
3910b57cec5SDimitry Andric         VectorInfo[I + 1] = PVF_INNER;
3920b57cec5SDimitry Andric         VectorInfo[I + 2] = PVF_INNER;
3930b57cec5SDimitry Andric         VectorInfo[I + 3] = PVF_LAST;
3940b57cec5SDimitry Andric         I += 3;
3950b57cec5SDimitry Andric         break;
3960b57cec5SDimitry Andric       }
3970b57cec5SDimitry Andric       // Break out of the inner loop because we've already succeeded
3980b57cec5SDimitry Andric       // using largest possible AccessSize.
3990b57cec5SDimitry Andric       break;
4000b57cec5SDimitry Andric     }
4010b57cec5SDimitry Andric   }
4020b57cec5SDimitry Andric   return VectorInfo;
4030b57cec5SDimitry Andric }
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric // NVPTXTargetLowering Constructor.
4060b57cec5SDimitry Andric NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
4070b57cec5SDimitry Andric                                          const NVPTXSubtarget &STI)
4080b57cec5SDimitry Andric     : TargetLowering(TM), nvTM(&TM), STI(STI) {
4090b57cec5SDimitry Andric   // always lower memset, memcpy, and memmove intrinsics to load/store
4100b57cec5SDimitry Andric   // instructions, rather
4110b57cec5SDimitry Andric   // then generating calls to memset, mempcy or memmove.
4125f757f3fSDimitry Andric   MaxStoresPerMemset = MaxStoresPerMemsetOptSize = (unsigned)0xFFFFFFFF;
4135f757f3fSDimitry Andric   MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = (unsigned) 0xFFFFFFFF;
4145f757f3fSDimitry Andric   MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = (unsigned) 0xFFFFFFFF;
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric   setBooleanContents(ZeroOrNegativeOneBooleanContent);
4170b57cec5SDimitry Andric   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
4180b57cec5SDimitry Andric 
4190b57cec5SDimitry Andric   // Jump is Expensive. Don't create extra control flow for 'and', 'or'
4200b57cec5SDimitry Andric   // condition branches.
4210b57cec5SDimitry Andric   setJumpIsExpensive(true);
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric   // Wide divides are _very_ slow. Try to reduce the width of the divide if
4240b57cec5SDimitry Andric   // possible.
4250b57cec5SDimitry Andric   addBypassSlowDiv(64, 32);
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric   // By default, use the Source scheduling
4280b57cec5SDimitry Andric   if (sched4reg)
4290b57cec5SDimitry Andric     setSchedulingPreference(Sched::RegPressure);
4300b57cec5SDimitry Andric   else
4310b57cec5SDimitry Andric     setSchedulingPreference(Sched::Source);
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric   auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
4340b57cec5SDimitry Andric                                     LegalizeAction NoF16Action) {
4350b57cec5SDimitry Andric     setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
4360b57cec5SDimitry Andric   };
4370b57cec5SDimitry Andric 
43806c3fb27SDimitry Andric   auto setBF16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
43906c3fb27SDimitry Andric                                     LegalizeAction NoBF16Action) {
44006c3fb27SDimitry Andric     bool IsOpSupported = STI.hasBF16Math();
44106c3fb27SDimitry Andric     // Few instructions are available on sm_90 only
44206c3fb27SDimitry Andric     switch(Op) {
44306c3fb27SDimitry Andric       case ISD::FADD:
44406c3fb27SDimitry Andric       case ISD::FMUL:
44506c3fb27SDimitry Andric       case ISD::FSUB:
4465f757f3fSDimitry Andric       case ISD::SELECT:
4475f757f3fSDimitry Andric       case ISD::SELECT_CC:
4485f757f3fSDimitry Andric       case ISD::SETCC:
4495f757f3fSDimitry Andric       case ISD::FEXP2:
4505f757f3fSDimitry Andric       case ISD::FCEIL:
4515f757f3fSDimitry Andric       case ISD::FFLOOR:
4525f757f3fSDimitry Andric       case ISD::FNEARBYINT:
4535f757f3fSDimitry Andric       case ISD::FRINT:
4540fca6ea1SDimitry Andric       case ISD::FROUNDEVEN:
4555f757f3fSDimitry Andric       case ISD::FTRUNC:
45606c3fb27SDimitry Andric         IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 78;
45706c3fb27SDimitry Andric         break;
45806c3fb27SDimitry Andric     }
45906c3fb27SDimitry Andric     setOperationAction(
46006c3fb27SDimitry Andric         Op, VT, IsOpSupported ? Action : NoBF16Action);
46106c3fb27SDimitry Andric   };
46206c3fb27SDimitry Andric 
4635f757f3fSDimitry Andric   auto setI16x2OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
4645f757f3fSDimitry Andric                                      LegalizeAction NoI16x2Action) {
4655f757f3fSDimitry Andric     bool IsOpSupported = false;
4665f757f3fSDimitry Andric     // instructions are available on sm_90 only
4675f757f3fSDimitry Andric     switch (Op) {
4685f757f3fSDimitry Andric     case ISD::ADD:
4695f757f3fSDimitry Andric     case ISD::SMAX:
4705f757f3fSDimitry Andric     case ISD::SMIN:
4715f757f3fSDimitry Andric     case ISD::UMIN:
4725f757f3fSDimitry Andric     case ISD::UMAX:
4735f757f3fSDimitry Andric       IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 80;
4745f757f3fSDimitry Andric       break;
4755f757f3fSDimitry Andric     }
4765f757f3fSDimitry Andric     setOperationAction(Op, VT, IsOpSupported ? Action : NoI16x2Action);
4775f757f3fSDimitry Andric   };
4785f757f3fSDimitry Andric 
4790b57cec5SDimitry Andric   addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
4800b57cec5SDimitry Andric   addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
4815f757f3fSDimitry Andric   addRegisterClass(MVT::v2i16, &NVPTX::Int32RegsRegClass);
4825f757f3fSDimitry Andric   addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass);
4830b57cec5SDimitry Andric   addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
4840b57cec5SDimitry Andric   addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
4850b57cec5SDimitry Andric   addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
4860b57cec5SDimitry Andric   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
48706c3fb27SDimitry Andric   addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass);
48806c3fb27SDimitry Andric   addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass);
48906c3fb27SDimitry Andric   addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass);
49006c3fb27SDimitry Andric   addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass);
4910b57cec5SDimitry Andric 
4920b57cec5SDimitry Andric   // Conversion to/from FP16/FP16x2 is always legal.
4930b57cec5SDimitry Andric   setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
4940b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
4950b57cec5SDimitry Andric   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand);
4960b57cec5SDimitry Andric   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);
4970b57cec5SDimitry Andric 
4980fca6ea1SDimitry Andric   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
4990fca6ea1SDimitry Andric   if (STI.getSmVersion() >= 30 && STI.getPTXVersion() > 31)
5000fca6ea1SDimitry Andric     setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal);
5010fca6ea1SDimitry Andric 
5020b57cec5SDimitry Andric   setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
5030b57cec5SDimitry Andric   setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
5040b57cec5SDimitry Andric 
50506c3fb27SDimitry Andric   // Conversion to/from BFP16/BFP16x2 is always legal.
50606c3fb27SDimitry Andric   setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Custom);
50706c3fb27SDimitry Andric   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2bf16, Custom);
50806c3fb27SDimitry Andric   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2bf16, Expand);
50906c3fb27SDimitry Andric   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2bf16, Expand);
51006c3fb27SDimitry Andric 
51106c3fb27SDimitry Andric   setBF16OperationAction(ISD::SETCC, MVT::v2bf16, Legal, Expand);
5125f757f3fSDimitry Andric   setBF16OperationAction(ISD::SETCC, MVT::bf16, Legal, Promote);
5135f757f3fSDimitry Andric   if (getOperationAction(ISD::SETCC, MVT::bf16) == Promote)
5145f757f3fSDimitry Andric     AddPromotedToType(ISD::SETCC, MVT::bf16, MVT::f32);
5155f757f3fSDimitry Andric 
5165f757f3fSDimitry Andric   // Conversion to/from i16/i16x2 is always legal.
5175f757f3fSDimitry Andric   setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom);
5185f757f3fSDimitry Andric   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
5195f757f3fSDimitry Andric   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand);
5205f757f3fSDimitry Andric   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand);
5215f757f3fSDimitry Andric 
5225f757f3fSDimitry Andric   setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom);
5235f757f3fSDimitry Andric   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom);
5245f757f3fSDimitry Andric   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
5255f757f3fSDimitry Andric   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
5265f757f3fSDimitry Andric   // Only logical ops can be done on v4i8 directly, others must be done
5275f757f3fSDimitry Andric   // elementwise.
5285f757f3fSDimitry Andric   setOperationAction(
5295f757f3fSDimitry Andric       {ISD::ABS,         ISD::ADD,        ISD::ADDC,        ISD::ADDE,
5305f757f3fSDimitry Andric        ISD::BITREVERSE,  ISD::CTLZ,       ISD::CTPOP,       ISD::CTTZ,
5315f757f3fSDimitry Andric        ISD::FP_TO_SINT,  ISD::FP_TO_UINT, ISD::FSHL,        ISD::FSHR,
5325f757f3fSDimitry Andric        ISD::MUL,         ISD::MULHS,      ISD::MULHU,       ISD::PARITY,
5335f757f3fSDimitry Andric        ISD::ROTL,        ISD::ROTR,       ISD::SADDO,       ISD::SADDO_CARRY,
5345f757f3fSDimitry Andric        ISD::SADDSAT,     ISD::SDIV,       ISD::SDIVREM,     ISD::SELECT_CC,
5355f757f3fSDimitry Andric        ISD::SETCC,       ISD::SHL,        ISD::SINT_TO_FP,  ISD::SMAX,
5365f757f3fSDimitry Andric        ISD::SMIN,        ISD::SMULO,      ISD::SMUL_LOHI,   ISD::SRA,
5375f757f3fSDimitry Andric        ISD::SREM,        ISD::SRL,        ISD::SSHLSAT,     ISD::SSUBO,
5385f757f3fSDimitry Andric        ISD::SSUBO_CARRY, ISD::SSUBSAT,    ISD::SUB,         ISD::SUBC,
5395f757f3fSDimitry Andric        ISD::SUBE,        ISD::UADDO,      ISD::UADDO_CARRY, ISD::UADDSAT,
5405f757f3fSDimitry Andric        ISD::UDIV,        ISD::UDIVREM,    ISD::UINT_TO_FP,  ISD::UMAX,
5415f757f3fSDimitry Andric        ISD::UMIN,        ISD::UMULO,      ISD::UMUL_LOHI,   ISD::UREM,
5425f757f3fSDimitry Andric        ISD::USHLSAT,     ISD::USUBO,      ISD::USUBO_CARRY, ISD::VSELECT,
5435f757f3fSDimitry Andric        ISD::USUBSAT},
5445f757f3fSDimitry Andric       MVT::v4i8, Expand);
5455f757f3fSDimitry Andric 
5460b57cec5SDimitry Andric   // Operations not directly supported by NVPTX.
54706c3fb27SDimitry Andric   for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32,
5485f757f3fSDimitry Andric                  MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::v4i8,
5495f757f3fSDimitry Andric                  MVT::i32, MVT::i64}) {
5500b57cec5SDimitry Andric     setOperationAction(ISD::SELECT_CC, VT, Expand);
5510b57cec5SDimitry Andric     setOperationAction(ISD::BR_CC, VT, Expand);
5520b57cec5SDimitry Andric   }
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric   // Some SIGN_EXTEND_INREG can be done using cvt instruction.
5550b57cec5SDimitry Andric   // For others we will expand to a SHL/SRA pair.
5560b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
5570b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
5580b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
5590b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
5600b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
5615f757f3fSDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
5620b57cec5SDimitry Andric 
5630b57cec5SDimitry Andric   setOperationAction(ISD::SHL_PARTS, MVT::i32  , Custom);
5640b57cec5SDimitry Andric   setOperationAction(ISD::SRA_PARTS, MVT::i32  , Custom);
5650b57cec5SDimitry Andric   setOperationAction(ISD::SRL_PARTS, MVT::i32  , Custom);
5660b57cec5SDimitry Andric   setOperationAction(ISD::SHL_PARTS, MVT::i64  , Custom);
5670b57cec5SDimitry Andric   setOperationAction(ISD::SRA_PARTS, MVT::i64  , Custom);
5680b57cec5SDimitry Andric   setOperationAction(ISD::SRL_PARTS, MVT::i64  , Custom);
5690b57cec5SDimitry Andric 
5700b57cec5SDimitry Andric   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
5710b57cec5SDimitry Andric   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric   // TODO: we may consider expanding ROTL/ROTR on older GPUs.  Currently on GPUs
5740b57cec5SDimitry Andric   // that don't have h/w rotation we lower them to multi-instruction assembly.
5750b57cec5SDimitry Andric   // See ROT*_sw in NVPTXIntrInfo.td
5760b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i64, Legal);
5770b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i64, Legal);
5780b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i32, Legal);
5790b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i32, Legal);
5800b57cec5SDimitry Andric 
5810b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i16, Expand);
5825f757f3fSDimitry Andric   setOperationAction(ISD::ROTL, MVT::v2i16, Expand);
5830b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i16, Expand);
5845f757f3fSDimitry Andric   setOperationAction(ISD::ROTR, MVT::v2i16, Expand);
5850b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i8, Expand);
5860b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i8, Expand);
5870b57cec5SDimitry Andric   setOperationAction(ISD::BSWAP, MVT::i16, Expand);
5880b57cec5SDimitry Andric 
5890b57cec5SDimitry Andric   // Indirect branch is not supported.
5900b57cec5SDimitry Andric   // This also disables Jump Table creation.
5910b57cec5SDimitry Andric   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
5920b57cec5SDimitry Andric   setOperationAction(ISD::BRIND, MVT::Other, Expand);
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
5950b57cec5SDimitry Andric   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
5960b57cec5SDimitry Andric 
5970b57cec5SDimitry Andric   // We want to legalize constant related memmove and memcopy
5980b57cec5SDimitry Andric   // intrinsics.
5990b57cec5SDimitry Andric   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
6000b57cec5SDimitry Andric 
6010b57cec5SDimitry Andric   // Turn FP extload into load/fpextend
6020b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
6030b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
60406c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
60506c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
6060b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
6070b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
6080b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
60906c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
61006c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
6110b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
6120b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
6130b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
61406c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
61506c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
6160b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
6175f757f3fSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
6185f757f3fSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
6195f757f3fSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
6205f757f3fSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
6210b57cec5SDimitry Andric   // Turn FP truncstore into trunc + store.
6220b57cec5SDimitry Andric   // FIXME: vector types should also be expanded
6230b57cec5SDimitry Andric   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
6240b57cec5SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
62506c3fb27SDimitry Andric   setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
62606c3fb27SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
6270b57cec5SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric   // PTX does not support load / store predicate registers
6300b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::i1, Custom);
6310b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::i1, Custom);
6320b57cec5SDimitry Andric 
6330b57cec5SDimitry Andric   for (MVT VT : MVT::integer_valuetypes()) {
6340b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
6350b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
6360fca6ea1SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
6370b57cec5SDimitry Andric     setTruncStoreAction(VT, MVT::i1, Expand);
6380b57cec5SDimitry Andric   }
6390b57cec5SDimitry Andric 
6405f757f3fSDimitry Andric   // expand extload of vector of integers.
6415f757f3fSDimitry Andric   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
6425f757f3fSDimitry Andric                    MVT::v2i8, Expand);
6435f757f3fSDimitry Andric   setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
6445f757f3fSDimitry Andric 
6450b57cec5SDimitry Andric   // This is legal in NVPTX
6460b57cec5SDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
6470b57cec5SDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
6480b57cec5SDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
649bdd1243dSDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::bf16, Legal);
6500b57cec5SDimitry Andric 
6515f757f3fSDimitry Andric   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
6525f757f3fSDimitry Andric   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
6535f757f3fSDimitry Andric 
6540b57cec5SDimitry Andric   // TRAP can be lowered to PTX trap
6550b57cec5SDimitry Andric   setOperationAction(ISD::TRAP, MVT::Other, Legal);
6560b57cec5SDimitry Andric 
6570b57cec5SDimitry Andric   // Register custom handling for vector loads/stores
6588bcb0991SDimitry Andric   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
6590b57cec5SDimitry Andric     if (IsPTXVectorType(VT)) {
6600b57cec5SDimitry Andric       setOperationAction(ISD::LOAD, VT, Custom);
6610b57cec5SDimitry Andric       setOperationAction(ISD::STORE, VT, Custom);
6620b57cec5SDimitry Andric       setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
6630b57cec5SDimitry Andric     }
6640b57cec5SDimitry Andric   }
6650b57cec5SDimitry Andric 
666bdd1243dSDimitry Andric   // Support varargs.
667bdd1243dSDimitry Andric   setOperationAction(ISD::VASTART, MVT::Other, Custom);
668bdd1243dSDimitry Andric   setOperationAction(ISD::VAARG, MVT::Other, Custom);
669bdd1243dSDimitry Andric   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
670bdd1243dSDimitry Andric   setOperationAction(ISD::VAEND, MVT::Other, Expand);
671bdd1243dSDimitry Andric 
6720b57cec5SDimitry Andric   // Custom handling for i8 intrinsics
6730b57cec5SDimitry Andric   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
6740b57cec5SDimitry Andric 
6750b57cec5SDimitry Andric   for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
6760b57cec5SDimitry Andric     setOperationAction(ISD::ABS,  Ty, Legal);
6770b57cec5SDimitry Andric     setOperationAction(ISD::SMIN, Ty, Legal);
6780b57cec5SDimitry Andric     setOperationAction(ISD::SMAX, Ty, Legal);
6790b57cec5SDimitry Andric     setOperationAction(ISD::UMIN, Ty, Legal);
6800b57cec5SDimitry Andric     setOperationAction(ISD::UMAX, Ty, Legal);
6810b57cec5SDimitry Andric 
6820b57cec5SDimitry Andric     setOperationAction(ISD::CTPOP, Ty, Legal);
6830b57cec5SDimitry Andric     setOperationAction(ISD::CTLZ, Ty, Legal);
6840b57cec5SDimitry Andric   }
6850b57cec5SDimitry Andric 
6865f757f3fSDimitry Andric   setI16x2OperationAction(ISD::ABS, MVT::v2i16, Legal, Custom);
6875f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SMIN, MVT::v2i16, Legal, Custom);
6885f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SMAX, MVT::v2i16, Legal, Custom);
6895f757f3fSDimitry Andric   setI16x2OperationAction(ISD::UMIN, MVT::v2i16, Legal, Custom);
6905f757f3fSDimitry Andric   setI16x2OperationAction(ISD::UMAX, MVT::v2i16, Legal, Custom);
6915f757f3fSDimitry Andric   setI16x2OperationAction(ISD::CTPOP, MVT::v2i16, Legal, Expand);
6925f757f3fSDimitry Andric   setI16x2OperationAction(ISD::CTLZ, MVT::v2i16, Legal, Expand);
6935f757f3fSDimitry Andric 
6945f757f3fSDimitry Andric   setI16x2OperationAction(ISD::ADD, MVT::v2i16, Legal, Custom);
6955f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SUB, MVT::v2i16, Legal, Custom);
6965f757f3fSDimitry Andric   setI16x2OperationAction(ISD::MUL, MVT::v2i16, Legal, Custom);
6975f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SHL, MVT::v2i16, Legal, Custom);
6985f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SREM, MVT::v2i16, Legal, Custom);
6995f757f3fSDimitry Andric   setI16x2OperationAction(ISD::UREM, MVT::v2i16, Legal, Custom);
7005f757f3fSDimitry Andric 
7015f757f3fSDimitry Andric   // Other arithmetic and logic ops are unsupported.
7025f757f3fSDimitry Andric   setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS,
7035f757f3fSDimitry Andric                       ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
7045f757f3fSDimitry Andric                       ISD::SINT_TO_FP, ISD::UINT_TO_FP},
7055f757f3fSDimitry Andric                      MVT::v2i16, Expand);
7065f757f3fSDimitry Andric 
70781ad6265SDimitry Andric   setOperationAction(ISD::ADDC, MVT::i32, Legal);
70881ad6265SDimitry Andric   setOperationAction(ISD::ADDE, MVT::i32, Legal);
70981ad6265SDimitry Andric   setOperationAction(ISD::SUBC, MVT::i32, Legal);
71081ad6265SDimitry Andric   setOperationAction(ISD::SUBE, MVT::i32, Legal);
71181ad6265SDimitry Andric   if (STI.getPTXVersion() >= 43) {
71281ad6265SDimitry Andric     setOperationAction(ISD::ADDC, MVT::i64, Legal);
71381ad6265SDimitry Andric     setOperationAction(ISD::ADDE, MVT::i64, Legal);
71481ad6265SDimitry Andric     setOperationAction(ISD::SUBC, MVT::i64, Legal);
71581ad6265SDimitry Andric     setOperationAction(ISD::SUBE, MVT::i64, Legal);
71681ad6265SDimitry Andric   }
71781ad6265SDimitry Andric 
7180b57cec5SDimitry Andric   setOperationAction(ISD::CTTZ, MVT::i16, Expand);
7195f757f3fSDimitry Andric   setOperationAction(ISD::CTTZ, MVT::v2i16, Expand);
7200b57cec5SDimitry Andric   setOperationAction(ISD::CTTZ, MVT::i32, Expand);
7210b57cec5SDimitry Andric   setOperationAction(ISD::CTTZ, MVT::i64, Expand);
7220b57cec5SDimitry Andric 
7230b57cec5SDimitry Andric   // PTX does not directly support SELP of i1, so promote to i32 first
7240b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::i1, Custom);
7250b57cec5SDimitry Andric 
7260b57cec5SDimitry Andric   // PTX cannot multiply two i64s in a single instruction.
7270b57cec5SDimitry Andric   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
7280b57cec5SDimitry Andric   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
7290b57cec5SDimitry Andric 
7300b57cec5SDimitry Andric   // We have some custom DAG combine patterns for these nodes
7315f757f3fSDimitry Andric   setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::EXTRACT_VECTOR_ELT, ISD::FADD,
7325f757f3fSDimitry Andric                        ISD::LOAD, ISD::MUL, ISD::SHL, ISD::SREM, ISD::UREM,
7335f757f3fSDimitry Andric                        ISD::VSELECT});
7340b57cec5SDimitry Andric 
73506c3fb27SDimitry Andric   // setcc for f16x2 and bf16x2 needs special handling to prevent
73606c3fb27SDimitry Andric   // legalizer's attempt to scalarize it due to v2i1 not being legal.
73706c3fb27SDimitry Andric   if (STI.allowFP16Math() || STI.hasBF16Math())
7380b57cec5SDimitry Andric     setTargetDAGCombine(ISD::SETCC);
7390b57cec5SDimitry Andric 
7400b57cec5SDimitry Andric   // Promote fp16 arithmetic if fp16 hardware isn't available or the
7410b57cec5SDimitry Andric   // user passed --nvptx-no-fp16-math. The flag is useful because,
7420b57cec5SDimitry Andric   // although sm_53+ GPUs have some sort of FP16 support in
7430b57cec5SDimitry Andric   // hardware, only sm_53 and sm_60 have full implementation. Others
7440b57cec5SDimitry Andric   // only have token amount of hardware and are likely to run faster
7450b57cec5SDimitry Andric   // by using fp32 units instead.
7460b57cec5SDimitry Andric   for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
7470b57cec5SDimitry Andric     setFP16OperationAction(Op, MVT::f16, Legal, Promote);
7480b57cec5SDimitry Andric     setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
74906c3fb27SDimitry Andric     setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
75006c3fb27SDimitry Andric     // bf16 must be promoted to f32.
7515f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
75206c3fb27SDimitry Andric     if (getOperationAction(Op, MVT::bf16) == Promote)
75306c3fb27SDimitry Andric       AddPromotedToType(Op, MVT::bf16, MVT::f32);
7540b57cec5SDimitry Andric   }
7550b57cec5SDimitry Andric 
756bdd1243dSDimitry Andric   // f16/f16x2 neg was introduced in PTX 60, SM_53.
757bdd1243dSDimitry Andric   const bool IsFP16FP16x2NegAvailable = STI.getSmVersion() >= 53 &&
758bdd1243dSDimitry Andric                                         STI.getPTXVersion() >= 60 &&
759bdd1243dSDimitry Andric                                         STI.allowFP16Math();
760bdd1243dSDimitry Andric   for (const auto &VT : {MVT::f16, MVT::v2f16})
761bdd1243dSDimitry Andric     setOperationAction(ISD::FNEG, VT,
762bdd1243dSDimitry Andric                        IsFP16FP16x2NegAvailable ? Legal : Expand);
7630b57cec5SDimitry Andric 
76406c3fb27SDimitry Andric   setBF16OperationAction(ISD::FNEG, MVT::bf16, Legal, Expand);
76506c3fb27SDimitry Andric   setBF16OperationAction(ISD::FNEG, MVT::v2bf16, Legal, Expand);
7660b57cec5SDimitry Andric   // (would be) Library functions.
7670b57cec5SDimitry Andric 
7680b57cec5SDimitry Andric   // These map to conversion instructions for scalar FP types.
7690b57cec5SDimitry Andric   for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
770bdd1243dSDimitry Andric                          ISD::FROUNDEVEN, ISD::FTRUNC}) {
7710b57cec5SDimitry Andric     setOperationAction(Op, MVT::f16, Legal);
7720b57cec5SDimitry Andric     setOperationAction(Op, MVT::f32, Legal);
7730b57cec5SDimitry Andric     setOperationAction(Op, MVT::f64, Legal);
7740b57cec5SDimitry Andric     setOperationAction(Op, MVT::v2f16, Expand);
77506c3fb27SDimitry Andric     setOperationAction(Op, MVT::v2bf16, Expand);
7765f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
7775f757f3fSDimitry Andric     if (getOperationAction(Op, MVT::bf16) == Promote)
7785f757f3fSDimitry Andric       AddPromotedToType(Op, MVT::bf16, MVT::f32);
7795f757f3fSDimitry Andric   }
7805f757f3fSDimitry Andric 
7810fca6ea1SDimitry Andric   if (STI.getSmVersion() < 80 || STI.getPTXVersion() < 71) {
7820fca6ea1SDimitry Andric     setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);
7830fca6ea1SDimitry Andric   }
7840fca6ea1SDimitry Andric   if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) {
7850fca6ea1SDimitry Andric     for (MVT VT : {MVT::bf16, MVT::f32, MVT::f64}) {
7860fca6ea1SDimitry Andric       setOperationAction(ISD::FP_EXTEND, VT, Custom);
7870fca6ea1SDimitry Andric       setOperationAction(ISD::FP_ROUND, VT, Custom);
7880fca6ea1SDimitry Andric     }
7890fca6ea1SDimitry Andric   }
7900fca6ea1SDimitry Andric 
7915f757f3fSDimitry Andric   // sm_80 only has conversions between f32 and bf16. Custom lower all other
7925f757f3fSDimitry Andric   // bf16 conversions.
7930fca6ea1SDimitry Andric   if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) {
7945f757f3fSDimitry Andric     for (MVT VT : {MVT::i1, MVT::i16, MVT::i32, MVT::i64}) {
7955f757f3fSDimitry Andric       setOperationAction(
7965f757f3fSDimitry Andric           {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
7975f757f3fSDimitry Andric           VT, Custom);
7985f757f3fSDimitry Andric     }
7990fca6ea1SDimitry Andric     setOperationAction(
8000fca6ea1SDimitry Andric         {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
8010fca6ea1SDimitry Andric         MVT::bf16, Custom);
8020b57cec5SDimitry Andric   }
8030b57cec5SDimitry Andric 
8040b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::f16, Promote);
8050b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
80606c3fb27SDimitry Andric   setOperationAction(ISD::FROUND, MVT::v2bf16, Expand);
8070b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::f32, Custom);
8080b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::f64, Custom);
8095f757f3fSDimitry Andric   setOperationAction(ISD::FROUND, MVT::bf16, Promote);
8105f757f3fSDimitry Andric   AddPromotedToType(ISD::FROUND, MVT::bf16, MVT::f32);
8110b57cec5SDimitry Andric 
8120b57cec5SDimitry Andric   // 'Expand' implements FCOPYSIGN without calling an external library.
8130b57cec5SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
8140b57cec5SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
81506c3fb27SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
81606c3fb27SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::v2bf16, Expand);
8170b57cec5SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
8180b57cec5SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
8190b57cec5SDimitry Andric 
8200b57cec5SDimitry Andric   // These map to corresponding instructions for f32/f64. f16 must be
8210b57cec5SDimitry Andric   // promoted to f32. v2f16 is expanded to f16, which is then promoted
8220b57cec5SDimitry Andric   // to f32.
82304eeddc0SDimitry Andric   for (const auto &Op :
8245f757f3fSDimitry Andric        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) {
8250b57cec5SDimitry Andric     setOperationAction(Op, MVT::f16, Promote);
8260b57cec5SDimitry Andric     setOperationAction(Op, MVT::f32, Legal);
8270b57cec5SDimitry Andric     setOperationAction(Op, MVT::f64, Legal);
8280b57cec5SDimitry Andric     setOperationAction(Op, MVT::v2f16, Expand);
82906c3fb27SDimitry Andric     setOperationAction(Op, MVT::v2bf16, Expand);
8305f757f3fSDimitry Andric     setOperationAction(Op, MVT::bf16, Promote);
8315f757f3fSDimitry Andric     AddPromotedToType(Op, MVT::bf16, MVT::f32);
8320b57cec5SDimitry Andric   }
8335f757f3fSDimitry Andric   for (const auto &Op : {ISD::FABS}) {
8345f757f3fSDimitry Andric     setOperationAction(Op, MVT::f16, Promote);
8355f757f3fSDimitry Andric     setOperationAction(Op, MVT::f32, Legal);
8365f757f3fSDimitry Andric     setOperationAction(Op, MVT::f64, Legal);
8375f757f3fSDimitry Andric     setOperationAction(Op, MVT::v2f16, Expand);
8385f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
8395f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
8405f757f3fSDimitry Andric     if (getOperationAction(Op, MVT::bf16) == Promote)
8415f757f3fSDimitry Andric       AddPromotedToType(Op, MVT::bf16, MVT::f32);
8425f757f3fSDimitry Andric   }
8435f757f3fSDimitry Andric 
84404eeddc0SDimitry Andric   // max.f16, max.f16x2 and max.NaN are supported on sm_80+.
84504eeddc0SDimitry Andric   auto GetMinMaxAction = [&](LegalizeAction NotSm80Action) {
84604eeddc0SDimitry Andric     bool IsAtLeastSm80 = STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70;
84704eeddc0SDimitry Andric     return IsAtLeastSm80 ? Legal : NotSm80Action;
84804eeddc0SDimitry Andric   };
84904eeddc0SDimitry Andric   for (const auto &Op : {ISD::FMINNUM, ISD::FMAXNUM}) {
85004eeddc0SDimitry Andric     setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Promote), Promote);
85104eeddc0SDimitry Andric     setOperationAction(Op, MVT::f32, Legal);
85204eeddc0SDimitry Andric     setOperationAction(Op, MVT::f64, Legal);
85304eeddc0SDimitry Andric     setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
85406c3fb27SDimitry Andric     setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
8555f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
8565f757f3fSDimitry Andric     if (getOperationAction(Op, MVT::bf16) == Promote)
8575f757f3fSDimitry Andric       AddPromotedToType(Op, MVT::bf16, MVT::f32);
85804eeddc0SDimitry Andric   }
85904eeddc0SDimitry Andric   for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
86004eeddc0SDimitry Andric     setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
86106c3fb27SDimitry Andric     setFP16OperationAction(Op, MVT::bf16, Legal, Expand);
86204eeddc0SDimitry Andric     setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand));
86304eeddc0SDimitry Andric     setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
86406c3fb27SDimitry Andric     setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
86504eeddc0SDimitry Andric   }
8660b57cec5SDimitry Andric 
8670fca6ea1SDimitry Andric   // Custom lowering for inline asm with 128-bit operands
8680fca6ea1SDimitry Andric   setOperationAction(ISD::CopyToReg, MVT::i128, Custom);
8690fca6ea1SDimitry Andric   setOperationAction(ISD::CopyFromReg, MVT::i128, Custom);
8700fca6ea1SDimitry Andric 
8710b57cec5SDimitry Andric   // No FEXP2, FLOG2.  The PTX ex2 and log2 functions are always approximate.
8720b57cec5SDimitry Andric   // No FPOW or FREM in PTX.
8730b57cec5SDimitry Andric 
8740b57cec5SDimitry Andric   // Now deduce the information based on the above mentioned
8750b57cec5SDimitry Andric   // actions
8760b57cec5SDimitry Andric   computeRegisterProperties(STI.getRegisterInfo());
87781ad6265SDimitry Andric 
87881ad6265SDimitry Andric   setMinCmpXchgSizeInBits(32);
8791db9f3b2SDimitry Andric   setMaxAtomicSizeInBitsSupported(64);
8800fca6ea1SDimitry Andric   setMaxDivRemBitWidthSupported(64);
8810b57cec5SDimitry Andric }
8820b57cec5SDimitry Andric 
8830b57cec5SDimitry Andric const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
8840fca6ea1SDimitry Andric 
8850fca6ea1SDimitry Andric #define MAKE_CASE(V)                                                           \
8860fca6ea1SDimitry Andric   case V:                                                                      \
8870fca6ea1SDimitry Andric     return #V;
8880fca6ea1SDimitry Andric 
8890b57cec5SDimitry Andric   switch ((NVPTXISD::NodeType)Opcode) {
8900b57cec5SDimitry Andric   case NVPTXISD::FIRST_NUMBER:
8910b57cec5SDimitry Andric     break;
8920b57cec5SDimitry Andric 
8930fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CALL)
8940fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::RET_GLUE)
8950fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LOAD_PARAM)
8960fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Wrapper)
8970fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::DeclareParam)
8980fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::DeclareScalarParam)
8990fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::DeclareRet)
9000fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::DeclareScalarRet)
9010fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::DeclareRetParam)
9020fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::PrintCall)
9030fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::PrintConvergentCall)
9040fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::PrintCallUni)
9050fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::PrintConvergentCallUni)
9060fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LoadParam)
9070fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LoadParamV2)
9080fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LoadParamV4)
9090fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreParam)
9100fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreParamV2)
9110fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreParamV4)
9120fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreParamS32)
9130fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreParamU32)
9140fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CallArgBegin)
9150fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CallArg)
9160fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LastCallArg)
9170fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CallArgEnd)
9180fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CallVoid)
9190fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CallVal)
9200fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CallSymbol)
9210fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Prototype)
9220fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::MoveParam)
9230fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreRetval)
9240fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreRetvalV2)
9250fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreRetvalV4)
9260fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::PseudoUseParam)
9270fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::RETURN)
9280fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CallSeqBegin)
9290fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CallSeqEnd)
9300fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::CallPrototype)
9310fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::ProxyReg)
9320fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LoadV2)
9330fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LoadV4)
9340fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LDGV2)
9350fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LDGV4)
9360fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LDUV2)
9370fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::LDUV4)
9380fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreV2)
9390fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::StoreV4)
9400fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::FUN_SHFL_CLAMP)
9410fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::FUN_SHFR_CLAMP)
9420fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::IMAD)
9430fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::BFE)
9440fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::BFI)
9450fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::PRMT)
9460fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
9470fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::SETP_F16X2)
9480fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::SETP_BF16X2)
9490fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Dummy)
9500fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
9510fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
9520fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DFloatS32)
9530fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DFloatFloat)
9540fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DFloatFloatLevel)
9550fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DFloatFloatGrad)
9560fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DS32S32)
9570fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DS32Float)
9580fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DS32FloatLevel)
9590fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DS32FloatGrad)
9600fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DU32S32)
9610fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DU32Float)
9620fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DU32FloatLevel)
9630fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DU32FloatGrad)
9640fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayFloatS32)
9650fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayFloatFloat)
9660fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayFloatFloatLevel)
9670fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayFloatFloatGrad)
9680fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayS32S32)
9690fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayS32Float)
9700fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayS32FloatLevel)
9710fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayS32FloatGrad)
9720fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayU32S32)
9730fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayU32Float)
9740fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayU32FloatLevel)
9750fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex1DArrayU32FloatGrad)
9760fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DFloatS32)
9770fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DFloatFloat)
9780fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DFloatFloatLevel)
9790fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DFloatFloatGrad)
9800fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DS32S32)
9810fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DS32Float)
9820fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DS32FloatLevel)
9830fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DS32FloatGrad)
9840fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DU32S32)
9850fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DU32Float)
9860fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DU32FloatLevel)
9870fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DU32FloatGrad)
9880fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayFloatS32)
9890fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayFloatFloat)
9900fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayFloatFloatLevel)
9910fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayFloatFloatGrad)
9920fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayS32S32)
9930fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayS32Float)
9940fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayS32FloatLevel)
9950fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayS32FloatGrad)
9960fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayU32S32)
9970fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayU32Float)
9980fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayU32FloatLevel)
9990fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex2DArrayU32FloatGrad)
10000fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DFloatS32)
10010fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DFloatFloat)
10020fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DFloatFloatLevel)
10030fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DFloatFloatGrad)
10040fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DS32S32)
10050fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DS32Float)
10060fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DS32FloatLevel)
10070fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DS32FloatGrad)
10080fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DU32S32)
10090fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DU32Float)
10100fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DU32FloatLevel)
10110fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tex3DU32FloatGrad)
10120fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeFloatFloat)
10130fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeFloatFloatLevel)
10140fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeS32Float)
10150fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeS32FloatLevel)
10160fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeU32Float)
10170fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeU32FloatLevel)
10180fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeArrayFloatFloat)
10190fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeArrayFloatFloatLevel)
10200fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeArrayS32Float)
10210fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeArrayS32FloatLevel)
10220fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeArrayU32Float)
10230fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexCubeArrayU32FloatLevel)
10240fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4R2DFloatFloat)
10250fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4G2DFloatFloat)
10260fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4B2DFloatFloat)
10270fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4A2DFloatFloat)
10280fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4R2DS64Float)
10290fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4G2DS64Float)
10300fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4B2DS64Float)
10310fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4A2DS64Float)
10320fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4R2DU64Float)
10330fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4G2DU64Float)
10340fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4B2DU64Float)
10350fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4A2DU64Float)
10360b57cec5SDimitry Andric 
10370fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DFloatS32)
10380fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DFloatFloat)
10390fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DFloatFloatLevel)
10400fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DFloatFloatGrad)
10410fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DS32S32)
10420fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DS32Float)
10430fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DS32FloatLevel)
10440fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DS32FloatGrad)
10450fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DU32S32)
10460fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DU32Float)
10470fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DU32FloatLevel)
10480fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DU32FloatGrad)
10490fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatS32)
10500fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatFloat)
10510fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatFloatLevel)
10520fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayFloatFloatGrad)
10530fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayS32S32)
10540fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayS32Float)
10550fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayS32FloatLevel)
10560fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayS32FloatGrad)
10570fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayU32S32)
10580fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayU32Float)
10590fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayU32FloatLevel)
10600fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified1DArrayU32FloatGrad)
10610fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DFloatS32)
10620fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DFloatFloat)
10630fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DFloatFloatLevel)
10640fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DFloatFloatGrad)
10650fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DS32S32)
10660fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DS32Float)
10670fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DS32FloatLevel)
10680fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DS32FloatGrad)
10690fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DU32S32)
10700fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DU32Float)
10710fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DU32FloatLevel)
10720fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DU32FloatGrad)
10730fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatS32)
10740fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatFloat)
10750fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatFloatLevel)
10760fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayFloatFloatGrad)
10770fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayS32S32)
10780fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayS32Float)
10790fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayS32FloatLevel)
10800fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayS32FloatGrad)
10810fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayU32S32)
10820fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayU32Float)
10830fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayU32FloatLevel)
10840fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified2DArrayU32FloatGrad)
10850fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DFloatS32)
10860fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DFloatFloat)
10870fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DFloatFloatLevel)
10880fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DFloatFloatGrad)
10890fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DS32S32)
10900fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DS32Float)
10910fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DS32FloatLevel)
10920fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DS32FloatGrad)
10930fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DU32S32)
10940fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DU32Float)
10950fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DU32FloatLevel)
10960fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnified3DU32FloatGrad)
10970fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeFloatFloat)
10980fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeFloatFloatLevel)
10990fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeS32Float)
11000fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeS32FloatLevel)
11010fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeU32Float)
11020fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeU32FloatLevel)
11030fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayFloatFloat)
11040fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel)
11050fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayS32Float)
11060fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayS32FloatLevel)
11070fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayU32Float)
11080fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayU32FloatLevel)
11090fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeFloatFloatGrad)
11100fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeS32FloatGrad)
11110fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeU32FloatGrad)
11120fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad)
11130fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayS32FloatGrad)
11140fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::TexUnifiedCubeArrayU32FloatGrad)
11150fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedR2DFloatFloat)
11160fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedG2DFloatFloat)
11170fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedB2DFloatFloat)
11180fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedA2DFloatFloat)
11190fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedR2DS64Float)
11200fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedG2DS64Float)
11210fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedB2DS64Float)
11220fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedA2DS64Float)
11230fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedR2DU64Float)
11240fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedG2DU64Float)
11250fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedB2DU64Float)
11260fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Tld4UnifiedA2DU64Float)
11270b57cec5SDimitry Andric 
11280fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI8Clamp)
11290fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI16Clamp)
11300fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI32Clamp)
11310fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI64Clamp)
11320fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I8Clamp)
11330fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I16Clamp)
11340fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I32Clamp)
11350fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I64Clamp)
11360fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV4I8Clamp)
11370fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV4I16Clamp)
11380fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV4I32Clamp)
11390b57cec5SDimitry Andric 
11400fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI8Clamp)
11410fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI16Clamp)
11420fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI32Clamp)
11430fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI64Clamp)
11440fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I8Clamp)
11450fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I16Clamp)
11460fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I32Clamp)
11470fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I64Clamp)
11480fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV4I8Clamp)
11490fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV4I16Clamp)
11500fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV4I32Clamp)
11510b57cec5SDimitry Andric 
11520fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI8Clamp)
11530fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI16Clamp)
11540fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI32Clamp)
11550fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI64Clamp)
11560fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I8Clamp)
11570fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I16Clamp)
11580fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I32Clamp)
11590fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I64Clamp)
11600fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV4I8Clamp)
11610fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV4I16Clamp)
11620fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV4I32Clamp)
11630b57cec5SDimitry Andric 
11640fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI8Clamp)
11650fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI16Clamp)
11660fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI32Clamp)
11670fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI64Clamp)
11680fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I8Clamp)
11690fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I16Clamp)
11700fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I32Clamp)
11710fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I64Clamp)
11720fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV4I8Clamp)
11730fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV4I16Clamp)
11740fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV4I32Clamp)
11750b57cec5SDimitry Andric 
11760fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI8Clamp)
11770fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI16Clamp)
11780fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI32Clamp)
11790fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI64Clamp)
11800fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I8Clamp)
11810fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I16Clamp)
11820fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I32Clamp)
11830fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I64Clamp)
11840fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV4I8Clamp)
11850fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV4I16Clamp)
11860fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV4I32Clamp)
11870b57cec5SDimitry Andric 
11880fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI8Trap)
11890fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI16Trap)
11900fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI32Trap)
11910fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI64Trap)
11920fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I8Trap)
11930fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I16Trap)
11940fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I32Trap)
11950fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I64Trap)
11960fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV4I8Trap)
11970fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV4I16Trap)
11980fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV4I32Trap)
11990b57cec5SDimitry Andric 
12000fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI8Trap)
12010fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI16Trap)
12020fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI32Trap)
12030fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI64Trap)
12040fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I8Trap)
12050fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I16Trap)
12060fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I32Trap)
12070fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I64Trap)
12080fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV4I8Trap)
12090fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV4I16Trap)
12100fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV4I32Trap)
12110b57cec5SDimitry Andric 
12120fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI8Trap)
12130fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI16Trap)
12140fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI32Trap)
12150fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI64Trap)
12160fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I8Trap)
12170fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I16Trap)
12180fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I32Trap)
12190fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I64Trap)
12200fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV4I8Trap)
12210fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV4I16Trap)
12220fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV4I32Trap)
12230b57cec5SDimitry Andric 
12240fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI8Trap)
12250fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI16Trap)
12260fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI32Trap)
12270fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI64Trap)
12280fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I8Trap)
12290fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I16Trap)
12300fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I32Trap)
12310fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I64Trap)
12320fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV4I8Trap)
12330fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV4I16Trap)
12340fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV4I32Trap)
12350b57cec5SDimitry Andric 
12360fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI8Trap)
12370fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI16Trap)
12380fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI32Trap)
12390fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI64Trap)
12400fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I8Trap)
12410fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I16Trap)
12420fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I32Trap)
12430fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I64Trap)
12440fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV4I8Trap)
12450fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV4I16Trap)
12460fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV4I32Trap)
12470b57cec5SDimitry Andric 
12480fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI8Zero)
12490fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI16Zero)
12500fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI32Zero)
12510fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DI64Zero)
12520fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I8Zero)
12530fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I16Zero)
12540fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I32Zero)
12550fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV2I64Zero)
12560fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV4I8Zero)
12570fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV4I16Zero)
12580fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DV4I32Zero)
12590b57cec5SDimitry Andric 
12600fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI8Zero)
12610fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI16Zero)
12620fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI32Zero)
12630fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayI64Zero)
12640fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I8Zero)
12650fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I16Zero)
12660fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I32Zero)
12670fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV2I64Zero)
12680fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV4I8Zero)
12690fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV4I16Zero)
12700fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld1DArrayV4I32Zero)
12710b57cec5SDimitry Andric 
12720fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI8Zero)
12730fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI16Zero)
12740fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI32Zero)
12750fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DI64Zero)
12760fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I8Zero)
12770fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I16Zero)
12780fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I32Zero)
12790fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV2I64Zero)
12800fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV4I8Zero)
12810fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV4I16Zero)
12820fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DV4I32Zero)
12830b57cec5SDimitry Andric 
12840fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI8Zero)
12850fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI16Zero)
12860fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI32Zero)
12870fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayI64Zero)
12880fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I8Zero)
12890fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I16Zero)
12900fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I32Zero)
12910fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV2I64Zero)
12920fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV4I8Zero)
12930fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV4I16Zero)
12940fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld2DArrayV4I32Zero)
12950fca6ea1SDimitry Andric 
12960fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI8Zero)
12970fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI16Zero)
12980fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI32Zero)
12990fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DI64Zero)
13000fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I8Zero)
13010fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I16Zero)
13020fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I32Zero)
13030fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV2I64Zero)
13040fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV4I8Zero)
13050fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV4I16Zero)
13060fca6ea1SDimitry Andric     MAKE_CASE(NVPTXISD::Suld3DV4I32Zero)
13070b57cec5SDimitry Andric   }
13080b57cec5SDimitry Andric   return nullptr;
13090fca6ea1SDimitry Andric 
13100fca6ea1SDimitry Andric #undef MAKE_CASE
13110b57cec5SDimitry Andric }
13120b57cec5SDimitry Andric 
13130b57cec5SDimitry Andric TargetLoweringBase::LegalizeTypeAction
13140b57cec5SDimitry Andric NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
1315fe6060f1SDimitry Andric   if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
1316fe6060f1SDimitry Andric       VT.getScalarType() == MVT::i1)
13170b57cec5SDimitry Andric     return TypeSplitVector;
13185f757f3fSDimitry Andric   if (Isv2x16VT(VT))
13190b57cec5SDimitry Andric     return TypeLegal;
13200b57cec5SDimitry Andric   return TargetLoweringBase::getPreferredVectorAction(VT);
13210b57cec5SDimitry Andric }
13220b57cec5SDimitry Andric 
13230b57cec5SDimitry Andric SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13240b57cec5SDimitry Andric                                              int Enabled, int &ExtraSteps,
13250b57cec5SDimitry Andric                                              bool &UseOneConst,
13260b57cec5SDimitry Andric                                              bool Reciprocal) const {
13270b57cec5SDimitry Andric   if (!(Enabled == ReciprocalEstimate::Enabled ||
13280b57cec5SDimitry Andric         (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
13290b57cec5SDimitry Andric     return SDValue();
13300b57cec5SDimitry Andric 
13310b57cec5SDimitry Andric   if (ExtraSteps == ReciprocalEstimate::Unspecified)
13320b57cec5SDimitry Andric     ExtraSteps = 0;
13330b57cec5SDimitry Andric 
13340b57cec5SDimitry Andric   SDLoc DL(Operand);
13350b57cec5SDimitry Andric   EVT VT = Operand.getValueType();
13360b57cec5SDimitry Andric   bool Ftz = useF32FTZ(DAG.getMachineFunction());
13370b57cec5SDimitry Andric 
13380b57cec5SDimitry Andric   auto MakeIntrinsicCall = [&](Intrinsic::ID IID) {
13390b57cec5SDimitry Andric     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13400b57cec5SDimitry Andric                        DAG.getConstant(IID, DL, MVT::i32), Operand);
13410b57cec5SDimitry Andric   };
13420b57cec5SDimitry Andric 
13430b57cec5SDimitry Andric   // The sqrt and rsqrt refinement processes assume we always start out with an
13440b57cec5SDimitry Andric   // approximation of the rsqrt.  Therefore, if we're going to do any refinement
13450b57cec5SDimitry Andric   // (i.e. ExtraSteps > 0), we must return an rsqrt.  But if we're *not* doing
13460b57cec5SDimitry Andric   // any refinement, we must return a regular sqrt.
13470b57cec5SDimitry Andric   if (Reciprocal || ExtraSteps > 0) {
13480b57cec5SDimitry Andric     if (VT == MVT::f32)
13490b57cec5SDimitry Andric       return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
13500b57cec5SDimitry Andric                                    : Intrinsic::nvvm_rsqrt_approx_f);
13510b57cec5SDimitry Andric     else if (VT == MVT::f64)
13520b57cec5SDimitry Andric       return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
13530b57cec5SDimitry Andric     else
13540b57cec5SDimitry Andric       return SDValue();
13550b57cec5SDimitry Andric   } else {
13560b57cec5SDimitry Andric     if (VT == MVT::f32)
13570b57cec5SDimitry Andric       return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
13580b57cec5SDimitry Andric                                    : Intrinsic::nvvm_sqrt_approx_f);
13590b57cec5SDimitry Andric     else {
13600b57cec5SDimitry Andric       // There's no sqrt.approx.f64 instruction, so we emit
13610b57cec5SDimitry Andric       // reciprocal(rsqrt(x)).  This is faster than
13620b57cec5SDimitry Andric       // select(x == 0, 0, x * rsqrt(x)).  (In fact, it's faster than plain
13630b57cec5SDimitry Andric       // x * rsqrt(x).)
13640b57cec5SDimitry Andric       return DAG.getNode(
13650b57cec5SDimitry Andric           ISD::INTRINSIC_WO_CHAIN, DL, VT,
13660b57cec5SDimitry Andric           DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32),
13670b57cec5SDimitry Andric           MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
13680b57cec5SDimitry Andric     }
13690b57cec5SDimitry Andric   }
13700b57cec5SDimitry Andric }
13710b57cec5SDimitry Andric 
13720b57cec5SDimitry Andric SDValue
13730b57cec5SDimitry Andric NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
13740b57cec5SDimitry Andric   SDLoc dl(Op);
13750b57cec5SDimitry Andric   const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
13760b57cec5SDimitry Andric   auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
13770b57cec5SDimitry Andric   Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
13780b57cec5SDimitry Andric   return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
13790b57cec5SDimitry Andric }
13800b57cec5SDimitry Andric 
138106c3fb27SDimitry Andric static bool IsTypePassedAsArray(const Type *Ty) {
138206c3fb27SDimitry Andric   return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
138306c3fb27SDimitry Andric          Ty->isHalfTy() || Ty->isBFloatTy();
138406c3fb27SDimitry Andric }
138506c3fb27SDimitry Andric 
13860b57cec5SDimitry Andric std::string NVPTXTargetLowering::getPrototype(
13870b57cec5SDimitry Andric     const DataLayout &DL, Type *retTy, const ArgListTy &Args,
13885ffd83dbSDimitry Andric     const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
1389bdd1243dSDimitry Andric     std::optional<std::pair<unsigned, const APInt &>> VAInfo,
1390e8d8bef9SDimitry Andric     const CallBase &CB, unsigned UniqueCallSite) const {
13910b57cec5SDimitry Andric   auto PtrVT = getPointerTy(DL);
13920b57cec5SDimitry Andric 
13930b57cec5SDimitry Andric   bool isABI = (STI.getSmVersion() >= 20);
13940b57cec5SDimitry Andric   assert(isABI && "Non-ABI compilation is not supported");
13950b57cec5SDimitry Andric   if (!isABI)
13960b57cec5SDimitry Andric     return "";
13970b57cec5SDimitry Andric 
1398bdd1243dSDimitry Andric   std::string Prototype;
1399bdd1243dSDimitry Andric   raw_string_ostream O(Prototype);
1400e8d8bef9SDimitry Andric   O << "prototype_" << UniqueCallSite << " : .callprototype ";
14010b57cec5SDimitry Andric 
14020b57cec5SDimitry Andric   if (retTy->getTypeID() == Type::VoidTyID) {
14030b57cec5SDimitry Andric     O << "()";
14040b57cec5SDimitry Andric   } else {
14050b57cec5SDimitry Andric     O << "(";
140606c3fb27SDimitry Andric     if ((retTy->isFloatingPointTy() || retTy->isIntegerTy()) &&
140706c3fb27SDimitry Andric         !IsTypePassedAsArray(retTy)) {
14080b57cec5SDimitry Andric       unsigned size = 0;
14090b57cec5SDimitry Andric       if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
14100b57cec5SDimitry Andric         size = ITy->getBitWidth();
14110b57cec5SDimitry Andric       } else {
14120b57cec5SDimitry Andric         assert(retTy->isFloatingPointTy() &&
14130b57cec5SDimitry Andric                "Floating point type expected here");
14140b57cec5SDimitry Andric         size = retTy->getPrimitiveSizeInBits();
14150b57cec5SDimitry Andric       }
14160b57cec5SDimitry Andric       // PTX ABI requires all scalar return values to be at least 32
14170b57cec5SDimitry Andric       // bits in size.  fp16 normally uses .b16 as its storage type in
14180b57cec5SDimitry Andric       // PTX, so its size must be adjusted here, too.
1419fcaf7f86SDimitry Andric       size = promoteScalarArgumentSize(size);
14200b57cec5SDimitry Andric 
14210b57cec5SDimitry Andric       O << ".param .b" << size << " _";
14220b57cec5SDimitry Andric     } else if (isa<PointerType>(retTy)) {
14230b57cec5SDimitry Andric       O << ".param .b" << PtrVT.getSizeInBits() << " _";
142406c3fb27SDimitry Andric     } else if (IsTypePassedAsArray(retTy)) {
14255ffd83dbSDimitry Andric       O << ".param .align " << (retAlignment ? retAlignment->value() : 0)
14265ffd83dbSDimitry Andric         << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]";
14270b57cec5SDimitry Andric     } else {
14280b57cec5SDimitry Andric       llvm_unreachable("Unknown return type");
14290b57cec5SDimitry Andric     }
14300b57cec5SDimitry Andric     O << ") ";
14310b57cec5SDimitry Andric   }
14320b57cec5SDimitry Andric   O << "_ (";
14330b57cec5SDimitry Andric 
14340b57cec5SDimitry Andric   bool first = true;
14350b57cec5SDimitry Andric 
1436bdd1243dSDimitry Andric   unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
1437bdd1243dSDimitry Andric   for (unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
14380b57cec5SDimitry Andric     Type *Ty = Args[i].Ty;
14390b57cec5SDimitry Andric     if (!first) {
14400b57cec5SDimitry Andric       O << ", ";
14410b57cec5SDimitry Andric     }
14420b57cec5SDimitry Andric     first = false;
14430b57cec5SDimitry Andric 
14440b57cec5SDimitry Andric     if (!Outs[OIdx].Flags.isByVal()) {
144506c3fb27SDimitry Andric       if (IsTypePassedAsArray(Ty)) {
14460fca6ea1SDimitry Andric         Align ParamAlign =
14470fca6ea1SDimitry Andric             getArgumentAlignment(&CB, Ty, i + AttributeList::FirstArgIndex, DL);
14480fca6ea1SDimitry Andric         O << ".param .align " << ParamAlign.value() << " .b8 ";
14490b57cec5SDimitry Andric         O << "_";
145081ad6265SDimitry Andric         O << "[" << DL.getTypeAllocSize(Ty) << "]";
14510b57cec5SDimitry Andric         // update the index for Outs
14520b57cec5SDimitry Andric         SmallVector<EVT, 16> vtparts;
14530b57cec5SDimitry Andric         ComputeValueVTs(*this, DL, Ty, vtparts);
14540b57cec5SDimitry Andric         if (unsigned len = vtparts.size())
14550b57cec5SDimitry Andric           OIdx += len - 1;
14560b57cec5SDimitry Andric         continue;
14570b57cec5SDimitry Andric       }
14580b57cec5SDimitry Andric       // i8 types in IR will be i16 types in SDAG
14590b57cec5SDimitry Andric       assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
14600b57cec5SDimitry Andric               (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
14610b57cec5SDimitry Andric              "type mismatch between callee prototype and arguments");
14620b57cec5SDimitry Andric       // scalar type
14630b57cec5SDimitry Andric       unsigned sz = 0;
14640b57cec5SDimitry Andric       if (isa<IntegerType>(Ty)) {
14650b57cec5SDimitry Andric         sz = cast<IntegerType>(Ty)->getBitWidth();
1466fcaf7f86SDimitry Andric         sz = promoteScalarArgumentSize(sz);
14670b57cec5SDimitry Andric       } else if (isa<PointerType>(Ty)) {
14680b57cec5SDimitry Andric         sz = PtrVT.getSizeInBits();
146906c3fb27SDimitry Andric       } else {
14700b57cec5SDimitry Andric         sz = Ty->getPrimitiveSizeInBits();
147106c3fb27SDimitry Andric       }
14720b57cec5SDimitry Andric       O << ".param .b" << sz << " ";
14730b57cec5SDimitry Andric       O << "_";
14740b57cec5SDimitry Andric       continue;
14750b57cec5SDimitry Andric     }
14760b57cec5SDimitry Andric 
147736b606aeSDimitry Andric     // Indirect calls need strict ABI alignment so we disable optimizations by
147836b606aeSDimitry Andric     // not providing a function to optimize.
147981ad6265SDimitry Andric     Type *ETy = Args[i].IndirectType;
1480bdd1243dSDimitry Andric     Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1481bdd1243dSDimitry Andric     Align ParamByValAlign =
148236b606aeSDimitry Andric         getFunctionByValParamAlign(/*F=*/nullptr, ETy, InitialAlign, DL);
148381ad6265SDimitry Andric 
148481ad6265SDimitry Andric     O << ".param .align " << ParamByValAlign.value() << " .b8 ";
14850b57cec5SDimitry Andric     O << "_";
148681ad6265SDimitry Andric     O << "[" << Outs[OIdx].Flags.getByValSize() << "]";
14870b57cec5SDimitry Andric   }
1488bdd1243dSDimitry Andric 
1489bdd1243dSDimitry Andric   if (VAInfo)
1490bdd1243dSDimitry Andric     O << (first ? "" : ",") << " .param .align " << VAInfo->second
1491bdd1243dSDimitry Andric       << " .b8 _[]\n";
1492bdd1243dSDimitry Andric   O << ")";
1493bdd1243dSDimitry Andric   if (shouldEmitPTXNoReturn(&CB, *nvTM))
1494bdd1243dSDimitry Andric     O << " .noreturn";
1495bdd1243dSDimitry Andric   O << ";";
1496bdd1243dSDimitry Andric 
1497bdd1243dSDimitry Andric   return Prototype;
14980b57cec5SDimitry Andric }
14990b57cec5SDimitry Andric 
15000fca6ea1SDimitry Andric Align NVPTXTargetLowering::getFunctionArgumentAlignment(
15010fca6ea1SDimitry Andric     const Function *F, Type *Ty, unsigned Idx, const DataLayout &DL) const {
15020fca6ea1SDimitry Andric   return getAlign(*F, Idx).value_or(getFunctionParamOptimizedAlign(F, Ty, DL));
15030fca6ea1SDimitry Andric }
15040fca6ea1SDimitry Andric 
15057a6dacacSDimitry Andric Align NVPTXTargetLowering::getArgumentAlignment(const CallBase *CB, Type *Ty,
15065ffd83dbSDimitry Andric                                                 unsigned Idx,
15070b57cec5SDimitry Andric                                                 const DataLayout &DL) const {
15085ffd83dbSDimitry Andric   if (!CB) {
15090b57cec5SDimitry Andric     // CallSite is zero, fallback to ABI type alignment
15105ffd83dbSDimitry Andric     return DL.getABITypeAlign(Ty);
15110b57cec5SDimitry Andric   }
15120b57cec5SDimitry Andric 
15135ffd83dbSDimitry Andric   const Function *DirectCallee = CB->getCalledFunction();
15140b57cec5SDimitry Andric 
15150b57cec5SDimitry Andric   if (!DirectCallee) {
15160b57cec5SDimitry Andric     // We don't have a direct function symbol, but that may be because of
15170b57cec5SDimitry Andric     // constant cast instructions in the call.
15180b57cec5SDimitry Andric 
15190b57cec5SDimitry Andric     // With bitcast'd call targets, the instruction will be the call
15205ffd83dbSDimitry Andric     if (const auto *CI = dyn_cast<CallInst>(CB)) {
15210b57cec5SDimitry Andric       // Check if we have call alignment metadata
15220fca6ea1SDimitry Andric       if (MaybeAlign StackAlign = getAlign(*CI, Idx))
15230fca6ea1SDimitry Andric         return StackAlign.value();
15240b57cec5SDimitry Andric     }
1525bdd1243dSDimitry Andric     DirectCallee = getMaybeBitcastedCallee(CB);
15260b57cec5SDimitry Andric   }
15270b57cec5SDimitry Andric 
15280b57cec5SDimitry Andric   // Check for function alignment information if we found that the
15290b57cec5SDimitry Andric   // ultimate target is a Function
15300fca6ea1SDimitry Andric   if (DirectCallee)
15310fca6ea1SDimitry Andric     return getFunctionArgumentAlignment(DirectCallee, Ty, Idx, DL);
15320b57cec5SDimitry Andric 
153381ad6265SDimitry Andric   // Call is indirect, fall back to the ABI type alignment
15345ffd83dbSDimitry Andric   return DL.getABITypeAlign(Ty);
15350b57cec5SDimitry Andric }
15360b57cec5SDimitry Andric 
15370fca6ea1SDimitry Andric static bool adjustElementType(EVT &ElementType) {
15380fca6ea1SDimitry Andric   switch (ElementType.getSimpleVT().SimpleTy) {
15390fca6ea1SDimitry Andric   default:
15400fca6ea1SDimitry Andric     return false;
15410fca6ea1SDimitry Andric   case MVT::f16:
15420fca6ea1SDimitry Andric   case MVT::bf16:
15430fca6ea1SDimitry Andric     ElementType = MVT::i16;
15440fca6ea1SDimitry Andric     return true;
15450fca6ea1SDimitry Andric   case MVT::f32:
15460fca6ea1SDimitry Andric   case MVT::v2f16:
15470fca6ea1SDimitry Andric   case MVT::v2bf16:
15480fca6ea1SDimitry Andric     ElementType = MVT::i32;
15490fca6ea1SDimitry Andric     return true;
15500fca6ea1SDimitry Andric   case MVT::f64:
15510fca6ea1SDimitry Andric     ElementType = MVT::i64;
15520fca6ea1SDimitry Andric     return true;
15530fca6ea1SDimitry Andric   }
15540fca6ea1SDimitry Andric }
15550fca6ea1SDimitry Andric 
15560fca6ea1SDimitry Andric // Use byte-store when the param address of the argument value is unaligned.
15570fca6ea1SDimitry Andric // This may happen when the return value is a field of a packed structure.
15580fca6ea1SDimitry Andric //
15590fca6ea1SDimitry Andric // This is called in LowerCall() when passing the param values.
15600fca6ea1SDimitry Andric static SDValue LowerUnalignedStoreParam(SelectionDAG &DAG, SDValue Chain,
15610fca6ea1SDimitry Andric                                         uint64_t Offset, EVT ElementType,
15620fca6ea1SDimitry Andric                                         SDValue StVal, SDValue &InGlue,
15630fca6ea1SDimitry Andric                                         unsigned ArgID, const SDLoc &dl) {
15640fca6ea1SDimitry Andric   // Bit logic only works on integer types
15650fca6ea1SDimitry Andric   if (adjustElementType(ElementType))
15660fca6ea1SDimitry Andric     StVal = DAG.getNode(ISD::BITCAST, dl, ElementType, StVal);
15670fca6ea1SDimitry Andric 
15680fca6ea1SDimitry Andric   // Store each byte
15690fca6ea1SDimitry Andric   SDVTList StoreVTs = DAG.getVTList(MVT::Other, MVT::Glue);
15700fca6ea1SDimitry Andric   for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
15710fca6ea1SDimitry Andric     // Shift the byte to the last byte position
15720fca6ea1SDimitry Andric     SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, StVal,
15730fca6ea1SDimitry Andric                                    DAG.getConstant(i * 8, dl, MVT::i32));
15740fca6ea1SDimitry Andric     SDValue StoreOperands[] = {Chain, DAG.getConstant(ArgID, dl, MVT::i32),
15750fca6ea1SDimitry Andric                                DAG.getConstant(Offset + i, dl, MVT::i32),
15760fca6ea1SDimitry Andric                                ShiftVal, InGlue};
15770fca6ea1SDimitry Andric     // Trunc store only the last byte by using
15780fca6ea1SDimitry Andric     //     st.param.b8
15790fca6ea1SDimitry Andric     // The register type can be larger than b8.
15800fca6ea1SDimitry Andric     Chain = DAG.getMemIntrinsicNode(
15810fca6ea1SDimitry Andric         NVPTXISD::StoreParam, dl, StoreVTs, StoreOperands, MVT::i8,
15820fca6ea1SDimitry Andric         MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
15830fca6ea1SDimitry Andric     InGlue = Chain.getValue(1);
15840fca6ea1SDimitry Andric   }
15850fca6ea1SDimitry Andric   return Chain;
15860fca6ea1SDimitry Andric }
15870fca6ea1SDimitry Andric 
15880fca6ea1SDimitry Andric // Use byte-load when the param adress of the returned value is unaligned.
15890fca6ea1SDimitry Andric // This may happen when the returned value is a field of a packed structure.
15900fca6ea1SDimitry Andric static SDValue
15910fca6ea1SDimitry Andric LowerUnalignedLoadRetParam(SelectionDAG &DAG, SDValue &Chain, uint64_t Offset,
15920fca6ea1SDimitry Andric                            EVT ElementType, SDValue &InGlue,
15930fca6ea1SDimitry Andric                            SmallVectorImpl<SDValue> &TempProxyRegOps,
15940fca6ea1SDimitry Andric                            const SDLoc &dl) {
15950fca6ea1SDimitry Andric   // Bit logic only works on integer types
15960fca6ea1SDimitry Andric   EVT MergedType = ElementType;
15970fca6ea1SDimitry Andric   adjustElementType(MergedType);
15980fca6ea1SDimitry Andric 
15990fca6ea1SDimitry Andric   // Load each byte and construct the whole value. Initial value to 0
16000fca6ea1SDimitry Andric   SDValue RetVal = DAG.getConstant(0, dl, MergedType);
16010fca6ea1SDimitry Andric   // LoadParamMemI8 loads into i16 register only
16020fca6ea1SDimitry Andric   SDVTList LoadVTs = DAG.getVTList(MVT::i16, MVT::Other, MVT::Glue);
16030fca6ea1SDimitry Andric   for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
16040fca6ea1SDimitry Andric     SDValue LoadOperands[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
16050fca6ea1SDimitry Andric                               DAG.getConstant(Offset + i, dl, MVT::i32),
16060fca6ea1SDimitry Andric                               InGlue};
16070fca6ea1SDimitry Andric     // This will be selected to LoadParamMemI8
16080fca6ea1SDimitry Andric     SDValue LdVal =
16090fca6ea1SDimitry Andric         DAG.getMemIntrinsicNode(NVPTXISD::LoadParam, dl, LoadVTs, LoadOperands,
16100fca6ea1SDimitry Andric                                 MVT::i8, MachinePointerInfo(), Align(1));
16110fca6ea1SDimitry Andric     SDValue TmpLdVal = LdVal.getValue(0);
16120fca6ea1SDimitry Andric     Chain = LdVal.getValue(1);
16130fca6ea1SDimitry Andric     InGlue = LdVal.getValue(2);
16140fca6ea1SDimitry Andric 
16150fca6ea1SDimitry Andric     TmpLdVal = DAG.getNode(NVPTXISD::ProxyReg, dl,
16160fca6ea1SDimitry Andric                            TmpLdVal.getSimpleValueType(), TmpLdVal);
16170fca6ea1SDimitry Andric     TempProxyRegOps.push_back(TmpLdVal);
16180fca6ea1SDimitry Andric 
16190fca6ea1SDimitry Andric     SDValue CMask = DAG.getConstant(255, dl, MergedType);
16200fca6ea1SDimitry Andric     SDValue CShift = DAG.getConstant(i * 8, dl, MVT::i32);
16210fca6ea1SDimitry Andric     // Need to extend the i16 register to the whole width.
16220fca6ea1SDimitry Andric     TmpLdVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MergedType, TmpLdVal);
16230fca6ea1SDimitry Andric     // Mask off the high bits. Leave only the lower 8bits.
16240fca6ea1SDimitry Andric     // Do this because we are using loadparam.b8.
16250fca6ea1SDimitry Andric     TmpLdVal = DAG.getNode(ISD::AND, dl, MergedType, TmpLdVal, CMask);
16260fca6ea1SDimitry Andric     // Shift and merge
16270fca6ea1SDimitry Andric     TmpLdVal = DAG.getNode(ISD::SHL, dl, MergedType, TmpLdVal, CShift);
16280fca6ea1SDimitry Andric     RetVal = DAG.getNode(ISD::OR, dl, MergedType, RetVal, TmpLdVal);
16290fca6ea1SDimitry Andric   }
16300fca6ea1SDimitry Andric   if (ElementType != MergedType)
16310fca6ea1SDimitry Andric     RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal);
16320fca6ea1SDimitry Andric 
16330fca6ea1SDimitry Andric   return RetVal;
16340fca6ea1SDimitry Andric }
16350fca6ea1SDimitry Andric 
16360b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
16370b57cec5SDimitry Andric                                        SmallVectorImpl<SDValue> &InVals) const {
1638bdd1243dSDimitry Andric 
1639bdd1243dSDimitry Andric   if (CLI.IsVarArg && (STI.getPTXVersion() < 60 || STI.getSmVersion() < 30))
1640bdd1243dSDimitry Andric     report_fatal_error(
1641bdd1243dSDimitry Andric         "Support for variadic functions (unsized array parameter) introduced "
1642bdd1243dSDimitry Andric         "in PTX ISA version 6.0 and requires target sm_30.");
1643bdd1243dSDimitry Andric 
16440b57cec5SDimitry Andric   SelectionDAG &DAG = CLI.DAG;
16450b57cec5SDimitry Andric   SDLoc dl = CLI.DL;
16460b57cec5SDimitry Andric   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
16470b57cec5SDimitry Andric   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
16480b57cec5SDimitry Andric   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
16490b57cec5SDimitry Andric   SDValue Chain = CLI.Chain;
16500b57cec5SDimitry Andric   SDValue Callee = CLI.Callee;
16510b57cec5SDimitry Andric   bool &isTailCall = CLI.IsTailCall;
16520b57cec5SDimitry Andric   ArgListTy &Args = CLI.getArgs();
16530b57cec5SDimitry Andric   Type *RetTy = CLI.RetTy;
16545ffd83dbSDimitry Andric   const CallBase *CB = CLI.CB;
16550b57cec5SDimitry Andric   const DataLayout &DL = DAG.getDataLayout();
16560b57cec5SDimitry Andric 
16570b57cec5SDimitry Andric   bool isABI = (STI.getSmVersion() >= 20);
16580b57cec5SDimitry Andric   assert(isABI && "Non-ABI compilation is not supported");
16590b57cec5SDimitry Andric   if (!isABI)
16600b57cec5SDimitry Andric     return Chain;
16610b57cec5SDimitry Andric 
1662bdd1243dSDimitry Andric   // Variadic arguments.
1663bdd1243dSDimitry Andric   //
1664bdd1243dSDimitry Andric   // Normally, for each argument, we declare a param scalar or a param
1665bdd1243dSDimitry Andric   // byte array in the .param space, and store the argument value to that
1666bdd1243dSDimitry Andric   // param scalar or array starting at offset 0.
1667bdd1243dSDimitry Andric   //
1668bdd1243dSDimitry Andric   // In the case of the first variadic argument, we declare a vararg byte array
1669bdd1243dSDimitry Andric   // with size 0. The exact size of this array isn't known at this point, so
1670bdd1243dSDimitry Andric   // it'll be patched later. All the variadic arguments will be stored to this
1671bdd1243dSDimitry Andric   // array at a certain offset (which gets tracked by 'VAOffset'). The offset is
1672bdd1243dSDimitry Andric   // initially set to 0, so it can be used for non-variadic arguments (which use
1673bdd1243dSDimitry Andric   // 0 offset) to simplify the code.
1674bdd1243dSDimitry Andric   //
1675bdd1243dSDimitry Andric   // After all vararg is processed, 'VAOffset' holds the size of the
1676bdd1243dSDimitry Andric   // vararg byte array.
1677bdd1243dSDimitry Andric 
1678bdd1243dSDimitry Andric   SDValue VADeclareParam;                 // vararg byte array
1679bdd1243dSDimitry Andric   unsigned FirstVAArg = CLI.NumFixedArgs; // position of the first variadic
1680bdd1243dSDimitry Andric   unsigned VAOffset = 0;                  // current offset in the param array
1681bdd1243dSDimitry Andric 
1682e8d8bef9SDimitry Andric   unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
168381ad6265SDimitry Andric   SDValue TempChain = Chain;
1684e8d8bef9SDimitry Andric   Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
168506c3fb27SDimitry Andric   SDValue InGlue = Chain.getValue(1);
16860b57cec5SDimitry Andric 
168781ad6265SDimitry Andric   unsigned ParamCount = 0;
16880b57cec5SDimitry Andric   // Args.size() and Outs.size() need not match.
16890b57cec5SDimitry Andric   // Outs.size() will be larger
16900b57cec5SDimitry Andric   //   * if there is an aggregate argument with multiple fields (each field
16910b57cec5SDimitry Andric   //     showing up separately in Outs)
16920b57cec5SDimitry Andric   //   * if there is a vector argument with more than typical vector-length
16930b57cec5SDimitry Andric   //     elements (generally if more than 4) where each vector element is
16940b57cec5SDimitry Andric   //     individually present in Outs.
16950b57cec5SDimitry Andric   // So a different index should be used for indexing into Outs/OutVals.
16960b57cec5SDimitry Andric   // See similar issue in LowerFormalArguments.
16970b57cec5SDimitry Andric   unsigned OIdx = 0;
16980b57cec5SDimitry Andric   // Declare the .params or .reg need to pass values
16990b57cec5SDimitry Andric   // to the function
17000b57cec5SDimitry Andric   for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
17010b57cec5SDimitry Andric     EVT VT = Outs[OIdx].VT;
17020b57cec5SDimitry Andric     Type *Ty = Args[i].Ty;
1703bdd1243dSDimitry Andric     bool IsVAArg = (i >= CLI.NumFixedArgs);
170481ad6265SDimitry Andric     bool IsByVal = Outs[OIdx].Flags.isByVal();
17050b57cec5SDimitry Andric 
17060b57cec5SDimitry Andric     SmallVector<EVT, 16> VTs;
17070b57cec5SDimitry Andric     SmallVector<uint64_t, 16> Offsets;
170881ad6265SDimitry Andric 
170981ad6265SDimitry Andric     assert((!IsByVal || Args[i].IndirectType) &&
171081ad6265SDimitry Andric            "byval arg must have indirect type");
171181ad6265SDimitry Andric     Type *ETy = (IsByVal ? Args[i].IndirectType : Ty);
1712bdd1243dSDimitry Andric     ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets, IsByVal ? 0 : VAOffset);
171381ad6265SDimitry Andric 
171481ad6265SDimitry Andric     Align ArgAlign;
171581ad6265SDimitry Andric     if (IsByVal) {
171681ad6265SDimitry Andric       // The ByValAlign in the Outs[OIdx].Flags is always set at this point,
171781ad6265SDimitry Andric       // so we don't need to worry whether it's naturally aligned or not.
171881ad6265SDimitry Andric       // See TargetLowering::LowerCallTo().
1719bdd1243dSDimitry Andric       Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1720bdd1243dSDimitry Andric       ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy,
1721bdd1243dSDimitry Andric                                             InitialAlign, DL);
1722bdd1243dSDimitry Andric       if (IsVAArg)
1723bdd1243dSDimitry Andric         VAOffset = alignTo(VAOffset, ArgAlign);
172481ad6265SDimitry Andric     } else {
17257a6dacacSDimitry Andric       ArgAlign = getArgumentAlignment(CB, Ty, ParamCount + 1, DL);
172681ad6265SDimitry Andric     }
172781ad6265SDimitry Andric 
172881ad6265SDimitry Andric     unsigned TypeSize =
172981ad6265SDimitry Andric         (IsByVal ? Outs[OIdx].Flags.getByValSize() : DL.getTypeAllocSize(Ty));
17300b57cec5SDimitry Andric     SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
173181ad6265SDimitry Andric 
17320b57cec5SDimitry Andric     bool NeedAlign; // Does argument declaration specify alignment?
173306c3fb27SDimitry Andric     bool PassAsArray = IsByVal || IsTypePassedAsArray(Ty);
1734bdd1243dSDimitry Andric     if (IsVAArg) {
1735bdd1243dSDimitry Andric       if (ParamCount == FirstVAArg) {
1736bdd1243dSDimitry Andric         SDValue DeclareParamOps[] = {
1737bdd1243dSDimitry Andric             Chain, DAG.getConstant(STI.getMaxRequiredAlignment(), dl, MVT::i32),
1738bdd1243dSDimitry Andric             DAG.getConstant(ParamCount, dl, MVT::i32),
173906c3fb27SDimitry Andric             DAG.getConstant(1, dl, MVT::i32), InGlue};
1740bdd1243dSDimitry Andric         VADeclareParam = Chain = DAG.getNode(NVPTXISD::DeclareParam, dl,
1741bdd1243dSDimitry Andric                                              DeclareParamVTs, DeclareParamOps);
1742bdd1243dSDimitry Andric       }
174306c3fb27SDimitry Andric       NeedAlign = PassAsArray;
174406c3fb27SDimitry Andric     } else if (PassAsArray) {
17450b57cec5SDimitry Andric       // declare .param .align <align> .b8 .param<n>[<size>];
17460b57cec5SDimitry Andric       SDValue DeclareParamOps[] = {
17475ffd83dbSDimitry Andric           Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
174881ad6265SDimitry Andric           DAG.getConstant(ParamCount, dl, MVT::i32),
174906c3fb27SDimitry Andric           DAG.getConstant(TypeSize, dl, MVT::i32), InGlue};
17500b57cec5SDimitry Andric       Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
17510b57cec5SDimitry Andric                           DeclareParamOps);
17520b57cec5SDimitry Andric       NeedAlign = true;
17530b57cec5SDimitry Andric     } else {
17540b57cec5SDimitry Andric       // declare .param .b<size> .param<n>;
1755fcaf7f86SDimitry Andric       if (VT.isInteger() || VT.isFloatingPoint()) {
17560b57cec5SDimitry Andric         // PTX ABI requires integral types to be at least 32 bits in
17570b57cec5SDimitry Andric         // size. FP16 is loaded/stored using i16, so it's handled
17580b57cec5SDimitry Andric         // here as well.
1759fcaf7f86SDimitry Andric         TypeSize = promoteScalarArgumentSize(TypeSize * 8) / 8;
17600b57cec5SDimitry Andric       }
17610b57cec5SDimitry Andric       SDValue DeclareScalarParamOps[] = {
176281ad6265SDimitry Andric           Chain, DAG.getConstant(ParamCount, dl, MVT::i32),
176381ad6265SDimitry Andric           DAG.getConstant(TypeSize * 8, dl, MVT::i32),
176406c3fb27SDimitry Andric           DAG.getConstant(0, dl, MVT::i32), InGlue};
17650b57cec5SDimitry Andric       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
17660b57cec5SDimitry Andric                           DeclareScalarParamOps);
17670b57cec5SDimitry Andric       NeedAlign = false;
17680b57cec5SDimitry Andric     }
176906c3fb27SDimitry Andric     InGlue = Chain.getValue(1);
17700b57cec5SDimitry Andric 
17710b57cec5SDimitry Andric     // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
17720b57cec5SDimitry Andric     // than 32-bits are sign extended or zero extended, depending on
17730b57cec5SDimitry Andric     // whether they are signed or unsigned types. This case applies
17740b57cec5SDimitry Andric     // only to scalar parameters and not to aggregate values.
17750b57cec5SDimitry Andric     bool ExtendIntegerParam =
17760b57cec5SDimitry Andric         Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
17770b57cec5SDimitry Andric 
1778bdd1243dSDimitry Andric     auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg);
17790b57cec5SDimitry Andric     SmallVector<SDValue, 6> StoreOperands;
17800b57cec5SDimitry Andric     for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
178181ad6265SDimitry Andric       EVT EltVT = VTs[j];
178281ad6265SDimitry Andric       int CurOffset = Offsets[j];
178381ad6265SDimitry Andric       MaybeAlign PartAlign;
178481ad6265SDimitry Andric       if (NeedAlign)
178581ad6265SDimitry Andric         PartAlign = commonAlignment(ArgAlign, CurOffset);
178681ad6265SDimitry Andric 
17870b57cec5SDimitry Andric       SDValue StVal = OutVals[OIdx];
1788fcaf7f86SDimitry Andric 
1789fcaf7f86SDimitry Andric       MVT PromotedVT;
1790fcaf7f86SDimitry Andric       if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
1791fcaf7f86SDimitry Andric         EltVT = EVT(PromotedVT);
1792fcaf7f86SDimitry Andric       }
1793fcaf7f86SDimitry Andric       if (PromoteScalarIntegerPTX(StVal.getValueType(), &PromotedVT)) {
1794fcaf7f86SDimitry Andric         llvm::ISD::NodeType Ext =
1795fcaf7f86SDimitry Andric             Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1796fcaf7f86SDimitry Andric         StVal = DAG.getNode(Ext, dl, PromotedVT, StVal);
1797fcaf7f86SDimitry Andric       }
1798fcaf7f86SDimitry Andric 
179981ad6265SDimitry Andric       if (IsByVal) {
180081ad6265SDimitry Andric         auto PtrVT = getPointerTy(DL);
180181ad6265SDimitry Andric         SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal,
180281ad6265SDimitry Andric                                       DAG.getConstant(CurOffset, dl, PtrVT));
180381ad6265SDimitry Andric         StVal = DAG.getLoad(EltVT, dl, TempChain, srcAddr, MachinePointerInfo(),
180481ad6265SDimitry Andric                             PartAlign);
180581ad6265SDimitry Andric       } else if (ExtendIntegerParam) {
18060b57cec5SDimitry Andric         assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
18070b57cec5SDimitry Andric         // zext/sext to i32
18080b57cec5SDimitry Andric         StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
18090b57cec5SDimitry Andric                                                       : ISD::ZERO_EXTEND,
18100b57cec5SDimitry Andric                             dl, MVT::i32, StVal);
181181ad6265SDimitry Andric       }
181281ad6265SDimitry Andric 
181381ad6265SDimitry Andric       if (!ExtendIntegerParam && EltVT.getSizeInBits() < 16) {
18140b57cec5SDimitry Andric         // Use 16-bit registers for small stores as it's the
18150b57cec5SDimitry Andric         // smallest general purpose register size supported by NVPTX.
18160b57cec5SDimitry Andric         StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
18170b57cec5SDimitry Andric       }
18180b57cec5SDimitry Andric 
18190fca6ea1SDimitry Andric       // If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a
18200fca6ea1SDimitry Andric       // scalar store. In such cases, fall back to byte stores.
18210fca6ea1SDimitry Andric       if (VectorInfo[j] == PVF_SCALAR && !IsVAArg && PartAlign.has_value() &&
18220fca6ea1SDimitry Andric           PartAlign.value() <
18230fca6ea1SDimitry Andric               DL.getABITypeAlign(EltVT.getTypeForEVT(*DAG.getContext()))) {
18240fca6ea1SDimitry Andric         assert(StoreOperands.empty() && "Unfinished preceeding store.");
18250fca6ea1SDimitry Andric         Chain = LowerUnalignedStoreParam(
18260fca6ea1SDimitry Andric             DAG, Chain, IsByVal ? CurOffset + VAOffset : CurOffset, EltVT,
18270fca6ea1SDimitry Andric             StVal, InGlue, ParamCount, dl);
18280fca6ea1SDimitry Andric 
18290fca6ea1SDimitry Andric         // LowerUnalignedStoreParam took care of inserting the necessary nodes
18300fca6ea1SDimitry Andric         // into the SDAG, so just move on to the next element.
18310fca6ea1SDimitry Andric         if (!IsByVal)
18320fca6ea1SDimitry Andric           ++OIdx;
18330fca6ea1SDimitry Andric         continue;
18340fca6ea1SDimitry Andric       }
18350fca6ea1SDimitry Andric 
18360fca6ea1SDimitry Andric       // New store.
18370fca6ea1SDimitry Andric       if (VectorInfo[j] & PVF_FIRST) {
18380fca6ea1SDimitry Andric         assert(StoreOperands.empty() && "Unfinished preceding store.");
18390fca6ea1SDimitry Andric         StoreOperands.push_back(Chain);
18400fca6ea1SDimitry Andric         StoreOperands.push_back(
18410fca6ea1SDimitry Andric             DAG.getConstant(IsVAArg ? FirstVAArg : ParamCount, dl, MVT::i32));
18420fca6ea1SDimitry Andric 
18430fca6ea1SDimitry Andric         StoreOperands.push_back(DAG.getConstant(
18440fca6ea1SDimitry Andric             IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset),
18450fca6ea1SDimitry Andric             dl, MVT::i32));
18460fca6ea1SDimitry Andric       }
18470fca6ea1SDimitry Andric 
18480b57cec5SDimitry Andric       // Record the value to store.
18490b57cec5SDimitry Andric       StoreOperands.push_back(StVal);
18500b57cec5SDimitry Andric 
18510b57cec5SDimitry Andric       if (VectorInfo[j] & PVF_LAST) {
18520b57cec5SDimitry Andric         unsigned NumElts = StoreOperands.size() - 3;
18530b57cec5SDimitry Andric         NVPTXISD::NodeType Op;
18540b57cec5SDimitry Andric         switch (NumElts) {
18550b57cec5SDimitry Andric         case 1:
18560b57cec5SDimitry Andric           Op = NVPTXISD::StoreParam;
18570b57cec5SDimitry Andric           break;
18580b57cec5SDimitry Andric         case 2:
18590b57cec5SDimitry Andric           Op = NVPTXISD::StoreParamV2;
18600b57cec5SDimitry Andric           break;
18610b57cec5SDimitry Andric         case 4:
18620b57cec5SDimitry Andric           Op = NVPTXISD::StoreParamV4;
18630b57cec5SDimitry Andric           break;
18640b57cec5SDimitry Andric         default:
18650b57cec5SDimitry Andric           llvm_unreachable("Invalid vector info.");
18660b57cec5SDimitry Andric         }
18670b57cec5SDimitry Andric 
186806c3fb27SDimitry Andric         StoreOperands.push_back(InGlue);
18690b57cec5SDimitry Andric 
18700b57cec5SDimitry Andric         // Adjust type of the store op if we've extended the scalar
18710b57cec5SDimitry Andric         // return value.
187281ad6265SDimitry Andric         EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
18730b57cec5SDimitry Andric 
18740b57cec5SDimitry Andric         Chain = DAG.getMemIntrinsicNode(
18750b57cec5SDimitry Andric             Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
187681ad6265SDimitry Andric             TheStoreType, MachinePointerInfo(), PartAlign,
18770b57cec5SDimitry Andric             MachineMemOperand::MOStore);
187806c3fb27SDimitry Andric         InGlue = Chain.getValue(1);
18790b57cec5SDimitry Andric 
18800b57cec5SDimitry Andric         // Cleanup.
18810b57cec5SDimitry Andric         StoreOperands.clear();
1882bdd1243dSDimitry Andric 
1883bdd1243dSDimitry Andric         // TODO: We may need to support vector types that can be passed
1884bdd1243dSDimitry Andric         // as scalars in variadic arguments.
1885bdd1243dSDimitry Andric         if (!IsByVal && IsVAArg) {
1886bdd1243dSDimitry Andric           assert(NumElts == 1 &&
1887bdd1243dSDimitry Andric                  "Vectorization is expected to be disabled for variadics.");
1888bdd1243dSDimitry Andric           VAOffset += DL.getTypeAllocSize(
1889bdd1243dSDimitry Andric               TheStoreType.getTypeForEVT(*DAG.getContext()));
1890bdd1243dSDimitry Andric         }
18910b57cec5SDimitry Andric       }
189281ad6265SDimitry Andric       if (!IsByVal)
18930b57cec5SDimitry Andric         ++OIdx;
18940b57cec5SDimitry Andric     }
18950b57cec5SDimitry Andric     assert(StoreOperands.empty() && "Unfinished parameter store.");
189681ad6265SDimitry Andric     if (!IsByVal && VTs.size() > 0)
18970b57cec5SDimitry Andric       --OIdx;
189881ad6265SDimitry Andric     ++ParamCount;
1899bdd1243dSDimitry Andric     if (IsByVal && IsVAArg)
1900bdd1243dSDimitry Andric       VAOffset += TypeSize;
19010b57cec5SDimitry Andric   }
19020b57cec5SDimitry Andric 
19030b57cec5SDimitry Andric   GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
1904bdd1243dSDimitry Andric   MaybeAlign retAlignment = std::nullopt;
19050b57cec5SDimitry Andric 
19060b57cec5SDimitry Andric   // Handle Result
19070b57cec5SDimitry Andric   if (Ins.size() > 0) {
19080b57cec5SDimitry Andric     SmallVector<EVT, 16> resvtparts;
19090b57cec5SDimitry Andric     ComputeValueVTs(*this, DL, RetTy, resvtparts);
19100b57cec5SDimitry Andric 
19110b57cec5SDimitry Andric     // Declare
191206c3fb27SDimitry Andric     //  .param .align N .b8 retval0[<size-in-bytes>], or
19130b57cec5SDimitry Andric     //  .param .b<size-in-bits> retval0
19140b57cec5SDimitry Andric     unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
191506c3fb27SDimitry Andric     if (!IsTypePassedAsArray(RetTy)) {
1916fcaf7f86SDimitry Andric       resultsz = promoteScalarArgumentSize(resultsz);
19170b57cec5SDimitry Andric       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
19180b57cec5SDimitry Andric       SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
19190b57cec5SDimitry Andric                                   DAG.getConstant(resultsz, dl, MVT::i32),
192006c3fb27SDimitry Andric                                   DAG.getConstant(0, dl, MVT::i32), InGlue };
19210b57cec5SDimitry Andric       Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
19220b57cec5SDimitry Andric                           DeclareRetOps);
192306c3fb27SDimitry Andric       InGlue = Chain.getValue(1);
19240b57cec5SDimitry Andric     } else {
19257a6dacacSDimitry Andric       retAlignment = getArgumentAlignment(CB, RetTy, 0, DL);
19265ffd83dbSDimitry Andric       assert(retAlignment && "retAlignment is guaranteed to be set");
19270b57cec5SDimitry Andric       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
19285ffd83dbSDimitry Andric       SDValue DeclareRetOps[] = {
19295ffd83dbSDimitry Andric           Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),
19300b57cec5SDimitry Andric           DAG.getConstant(resultsz / 8, dl, MVT::i32),
193106c3fb27SDimitry Andric           DAG.getConstant(0, dl, MVT::i32), InGlue};
19320b57cec5SDimitry Andric       Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
19330b57cec5SDimitry Andric                           DeclareRetOps);
193406c3fb27SDimitry Andric       InGlue = Chain.getValue(1);
19350b57cec5SDimitry Andric     }
19360b57cec5SDimitry Andric   }
19370b57cec5SDimitry Andric 
1938bdd1243dSDimitry Andric   bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
1939bdd1243dSDimitry Andric   // Set the size of the vararg param byte array if the callee is a variadic
1940bdd1243dSDimitry Andric   // function and the variadic part is not empty.
1941bdd1243dSDimitry Andric   if (HasVAArgs) {
1942bdd1243dSDimitry Andric     SDValue DeclareParamOps[] = {
1943bdd1243dSDimitry Andric         VADeclareParam.getOperand(0), VADeclareParam.getOperand(1),
1944bdd1243dSDimitry Andric         VADeclareParam.getOperand(2), DAG.getConstant(VAOffset, dl, MVT::i32),
1945bdd1243dSDimitry Andric         VADeclareParam.getOperand(4)};
1946bdd1243dSDimitry Andric     DAG.MorphNodeTo(VADeclareParam.getNode(), VADeclareParam.getOpcode(),
1947bdd1243dSDimitry Andric                     VADeclareParam->getVTList(), DeclareParamOps);
1948bdd1243dSDimitry Andric   }
1949bdd1243dSDimitry Andric 
19500b57cec5SDimitry Andric   // Both indirect calls and libcalls have nullptr Func. In order to distinguish
19510b57cec5SDimitry Andric   // between them we must rely on the call site value which is valid for
19520b57cec5SDimitry Andric   // indirect calls but is always null for libcalls.
19535ffd83dbSDimitry Andric   bool isIndirectCall = !Func && CB;
19540b57cec5SDimitry Andric 
19550b57cec5SDimitry Andric   if (isa<ExternalSymbolSDNode>(Callee)) {
19560b57cec5SDimitry Andric     Function* CalleeFunc = nullptr;
19570b57cec5SDimitry Andric 
19580b57cec5SDimitry Andric     // Try to find the callee in the current module.
19590b57cec5SDimitry Andric     Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
19600b57cec5SDimitry Andric     assert(CalleeFunc != nullptr && "Libcall callee must be set.");
19610b57cec5SDimitry Andric 
19620b57cec5SDimitry Andric     // Set the "libcall callee" attribute to indicate that the function
19630b57cec5SDimitry Andric     // must always have a declaration.
19640b57cec5SDimitry Andric     CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
19650b57cec5SDimitry Andric   }
19660b57cec5SDimitry Andric 
19670b57cec5SDimitry Andric   if (isIndirectCall) {
19680b57cec5SDimitry Andric     // This is indirect function call case : PTX requires a prototype of the
19690b57cec5SDimitry Andric     // form
19700b57cec5SDimitry Andric     // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
19710b57cec5SDimitry Andric     // to be emitted, and the label has to used as the last arg of call
19720b57cec5SDimitry Andric     // instruction.
19730b57cec5SDimitry Andric     // The prototype is embedded in a string and put as the operand for a
19740b57cec5SDimitry Andric     // CallPrototype SDNode which will print out to the value of the string.
19750b57cec5SDimitry Andric     SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1976bdd1243dSDimitry Andric     std::string Proto = getPrototype(
1977bdd1243dSDimitry Andric         DL, RetTy, Args, Outs, retAlignment,
1978bdd1243dSDimitry Andric         HasVAArgs
1979bdd1243dSDimitry Andric             ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
1980297eecfbSDimitry Andric                   CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1)))
1981bdd1243dSDimitry Andric             : std::nullopt,
1982bdd1243dSDimitry Andric         *CB, UniqueCallSite);
1983bdd1243dSDimitry Andric     const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
19840b57cec5SDimitry Andric     SDValue ProtoOps[] = {
1985bdd1243dSDimitry Andric         Chain,
1986bdd1243dSDimitry Andric         DAG.getTargetExternalSymbol(ProtoStr, MVT::i32),
198706c3fb27SDimitry Andric         InGlue,
19880b57cec5SDimitry Andric     };
19890b57cec5SDimitry Andric     Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
199006c3fb27SDimitry Andric     InGlue = Chain.getValue(1);
19910b57cec5SDimitry Andric   }
19920b57cec5SDimitry Andric   // Op to just print "call"
19930b57cec5SDimitry Andric   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
19940b57cec5SDimitry Andric   SDValue PrintCallOps[] = {
199506c3fb27SDimitry Andric     Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InGlue
19960b57cec5SDimitry Andric   };
19970b57cec5SDimitry Andric   // We model convergent calls as separate opcodes.
19980b57cec5SDimitry Andric   unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
19990b57cec5SDimitry Andric   if (CLI.IsConvergent)
20000b57cec5SDimitry Andric     Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
20010b57cec5SDimitry Andric                                               : NVPTXISD::PrintConvergentCall;
20020b57cec5SDimitry Andric   Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
200306c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
20040b57cec5SDimitry Andric 
20050b57cec5SDimitry Andric   // Ops to print out the function name
20060b57cec5SDimitry Andric   SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
200706c3fb27SDimitry Andric   SDValue CallVoidOps[] = { Chain, Callee, InGlue };
20080b57cec5SDimitry Andric   Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
200906c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
20100b57cec5SDimitry Andric 
20110b57cec5SDimitry Andric   // Ops to print out the param list
20120b57cec5SDimitry Andric   SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
201306c3fb27SDimitry Andric   SDValue CallArgBeginOps[] = { Chain, InGlue };
20140b57cec5SDimitry Andric   Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
20150b57cec5SDimitry Andric                       CallArgBeginOps);
201606c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
20170b57cec5SDimitry Andric 
2018bdd1243dSDimitry Andric   for (unsigned i = 0, e = std::min(CLI.NumFixedArgs + 1, ParamCount); i != e;
2019bdd1243dSDimitry Andric        ++i) {
20200b57cec5SDimitry Andric     unsigned opcode;
20210b57cec5SDimitry Andric     if (i == (e - 1))
20220b57cec5SDimitry Andric       opcode = NVPTXISD::LastCallArg;
20230b57cec5SDimitry Andric     else
20240b57cec5SDimitry Andric       opcode = NVPTXISD::CallArg;
20250b57cec5SDimitry Andric     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
20260b57cec5SDimitry Andric     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
202706c3fb27SDimitry Andric                              DAG.getConstant(i, dl, MVT::i32), InGlue };
20280b57cec5SDimitry Andric     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
202906c3fb27SDimitry Andric     InGlue = Chain.getValue(1);
20300b57cec5SDimitry Andric   }
20310b57cec5SDimitry Andric   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
20320b57cec5SDimitry Andric   SDValue CallArgEndOps[] = { Chain,
20330b57cec5SDimitry Andric                               DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
203406c3fb27SDimitry Andric                               InGlue };
20350b57cec5SDimitry Andric   Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
203606c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
20370b57cec5SDimitry Andric 
20380b57cec5SDimitry Andric   if (isIndirectCall) {
20390b57cec5SDimitry Andric     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
2040e8d8bef9SDimitry Andric     SDValue PrototypeOps[] = {
204106c3fb27SDimitry Andric         Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InGlue};
20420b57cec5SDimitry Andric     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
204306c3fb27SDimitry Andric     InGlue = Chain.getValue(1);
20440b57cec5SDimitry Andric   }
20450b57cec5SDimitry Andric 
20460b57cec5SDimitry Andric   SmallVector<SDValue, 16> ProxyRegOps;
2047bdd1243dSDimitry Andric   SmallVector<std::optional<MVT>, 16> ProxyRegTruncates;
20480fca6ea1SDimitry Andric   // An item of the vector is filled if the element does not need a ProxyReg
20490fca6ea1SDimitry Andric   // operation on it and should be added to InVals as is. ProxyRegOps and
20500fca6ea1SDimitry Andric   // ProxyRegTruncates contain empty/none items at the same index.
20510fca6ea1SDimitry Andric   SmallVector<SDValue, 16> RetElts;
20520fca6ea1SDimitry Andric   // A temporary ProxyReg operations inserted in `LowerUnalignedLoadRetParam()`
20530fca6ea1SDimitry Andric   // to use the values of `LoadParam`s and to be replaced later then
20540fca6ea1SDimitry Andric   // `CALLSEQ_END` is added.
20550fca6ea1SDimitry Andric   SmallVector<SDValue, 16> TempProxyRegOps;
20560b57cec5SDimitry Andric 
20570b57cec5SDimitry Andric   // Generate loads from param memory/moves from registers for result
20580b57cec5SDimitry Andric   if (Ins.size() > 0) {
20590b57cec5SDimitry Andric     SmallVector<EVT, 16> VTs;
20600b57cec5SDimitry Andric     SmallVector<uint64_t, 16> Offsets;
20610b57cec5SDimitry Andric     ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
20620b57cec5SDimitry Andric     assert(VTs.size() == Ins.size() && "Bad value decomposition");
20630b57cec5SDimitry Andric 
20647a6dacacSDimitry Andric     Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
20650b57cec5SDimitry Andric     auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
20660b57cec5SDimitry Andric 
20670b57cec5SDimitry Andric     SmallVector<EVT, 6> LoadVTs;
20680b57cec5SDimitry Andric     int VecIdx = -1; // Index of the first element of the vector.
20690b57cec5SDimitry Andric 
20700b57cec5SDimitry Andric     // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
20710b57cec5SDimitry Andric     // 32-bits are sign extended or zero extended, depending on whether
20720b57cec5SDimitry Andric     // they are signed or unsigned types.
20730b57cec5SDimitry Andric     bool ExtendIntegerRetVal =
20740b57cec5SDimitry Andric         RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
20750b57cec5SDimitry Andric 
20760b57cec5SDimitry Andric     for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
20770b57cec5SDimitry Andric       bool needTruncate = false;
20780b57cec5SDimitry Andric       EVT TheLoadType = VTs[i];
20790b57cec5SDimitry Andric       EVT EltType = Ins[i].VT;
20805ffd83dbSDimitry Andric       Align EltAlign = commonAlignment(RetAlign, Offsets[i]);
2081fcaf7f86SDimitry Andric       MVT PromotedVT;
2082fcaf7f86SDimitry Andric 
2083fcaf7f86SDimitry Andric       if (PromoteScalarIntegerPTX(TheLoadType, &PromotedVT)) {
2084fcaf7f86SDimitry Andric         TheLoadType = EVT(PromotedVT);
2085fcaf7f86SDimitry Andric         EltType = EVT(PromotedVT);
2086fcaf7f86SDimitry Andric         needTruncate = true;
2087fcaf7f86SDimitry Andric       }
2088fcaf7f86SDimitry Andric 
20890b57cec5SDimitry Andric       if (ExtendIntegerRetVal) {
20900b57cec5SDimitry Andric         TheLoadType = MVT::i32;
20910b57cec5SDimitry Andric         EltType = MVT::i32;
20920b57cec5SDimitry Andric         needTruncate = true;
20930b57cec5SDimitry Andric       } else if (TheLoadType.getSizeInBits() < 16) {
20940b57cec5SDimitry Andric         if (VTs[i].isInteger())
20950b57cec5SDimitry Andric           needTruncate = true;
20960b57cec5SDimitry Andric         EltType = MVT::i16;
20970b57cec5SDimitry Andric       }
20980b57cec5SDimitry Andric 
20990fca6ea1SDimitry Andric       // If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a
21000fca6ea1SDimitry Andric       // scalar load. In such cases, fall back to byte loads.
21010fca6ea1SDimitry Andric       if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType() &&
21020fca6ea1SDimitry Andric           EltAlign < DL.getABITypeAlign(
21030fca6ea1SDimitry Andric                          TheLoadType.getTypeForEVT(*DAG.getContext()))) {
21040fca6ea1SDimitry Andric         assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
21050fca6ea1SDimitry Andric         SDValue Ret = LowerUnalignedLoadRetParam(
21060fca6ea1SDimitry Andric             DAG, Chain, Offsets[i], TheLoadType, InGlue, TempProxyRegOps, dl);
21070fca6ea1SDimitry Andric         ProxyRegOps.push_back(SDValue());
21080fca6ea1SDimitry Andric         ProxyRegTruncates.push_back(std::optional<MVT>());
21090fca6ea1SDimitry Andric         RetElts.resize(i);
21100fca6ea1SDimitry Andric         RetElts.push_back(Ret);
21110fca6ea1SDimitry Andric 
21120fca6ea1SDimitry Andric         continue;
21130fca6ea1SDimitry Andric       }
21140fca6ea1SDimitry Andric 
21150b57cec5SDimitry Andric       // Record index of the very first element of the vector.
21160b57cec5SDimitry Andric       if (VectorInfo[i] & PVF_FIRST) {
21170b57cec5SDimitry Andric         assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
21180b57cec5SDimitry Andric         VecIdx = i;
21190b57cec5SDimitry Andric       }
21200b57cec5SDimitry Andric 
21210b57cec5SDimitry Andric       LoadVTs.push_back(EltType);
21220b57cec5SDimitry Andric 
21230b57cec5SDimitry Andric       if (VectorInfo[i] & PVF_LAST) {
21240b57cec5SDimitry Andric         unsigned NumElts = LoadVTs.size();
21250b57cec5SDimitry Andric         LoadVTs.push_back(MVT::Other);
21260b57cec5SDimitry Andric         LoadVTs.push_back(MVT::Glue);
21270b57cec5SDimitry Andric         NVPTXISD::NodeType Op;
21280b57cec5SDimitry Andric         switch (NumElts) {
21290b57cec5SDimitry Andric         case 1:
21300b57cec5SDimitry Andric           Op = NVPTXISD::LoadParam;
21310b57cec5SDimitry Andric           break;
21320b57cec5SDimitry Andric         case 2:
21330b57cec5SDimitry Andric           Op = NVPTXISD::LoadParamV2;
21340b57cec5SDimitry Andric           break;
21350b57cec5SDimitry Andric         case 4:
21360b57cec5SDimitry Andric           Op = NVPTXISD::LoadParamV4;
21370b57cec5SDimitry Andric           break;
21380b57cec5SDimitry Andric         default:
21390b57cec5SDimitry Andric           llvm_unreachable("Invalid vector info.");
21400b57cec5SDimitry Andric         }
21410b57cec5SDimitry Andric 
21420b57cec5SDimitry Andric         SDValue LoadOperands[] = {
21430b57cec5SDimitry Andric             Chain, DAG.getConstant(1, dl, MVT::i32),
214406c3fb27SDimitry Andric             DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InGlue};
21450b57cec5SDimitry Andric         SDValue RetVal = DAG.getMemIntrinsicNode(
21460b57cec5SDimitry Andric             Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
21470b57cec5SDimitry Andric             MachinePointerInfo(), EltAlign,
21480b57cec5SDimitry Andric             MachineMemOperand::MOLoad);
21490b57cec5SDimitry Andric 
21500b57cec5SDimitry Andric         for (unsigned j = 0; j < NumElts; ++j) {
21510b57cec5SDimitry Andric           ProxyRegOps.push_back(RetVal.getValue(j));
21520b57cec5SDimitry Andric 
21530b57cec5SDimitry Andric           if (needTruncate)
2154bdd1243dSDimitry Andric             ProxyRegTruncates.push_back(std::optional<MVT>(Ins[VecIdx + j].VT));
21550b57cec5SDimitry Andric           else
2156bdd1243dSDimitry Andric             ProxyRegTruncates.push_back(std::optional<MVT>());
21570b57cec5SDimitry Andric         }
21580b57cec5SDimitry Andric 
21590b57cec5SDimitry Andric         Chain = RetVal.getValue(NumElts);
216006c3fb27SDimitry Andric         InGlue = RetVal.getValue(NumElts + 1);
21610b57cec5SDimitry Andric 
21620b57cec5SDimitry Andric         // Cleanup
21630b57cec5SDimitry Andric         VecIdx = -1;
21640b57cec5SDimitry Andric         LoadVTs.clear();
21650b57cec5SDimitry Andric       }
21660b57cec5SDimitry Andric     }
21670b57cec5SDimitry Andric   }
21680b57cec5SDimitry Andric 
2169bdd1243dSDimitry Andric   Chain =
217006c3fb27SDimitry Andric       DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InGlue, dl);
217106c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
21720b57cec5SDimitry Andric 
21730b57cec5SDimitry Andric   // Append ProxyReg instructions to the chain to make sure that `callseq_end`
21740b57cec5SDimitry Andric   // will not get lost. Otherwise, during libcalls expansion, the nodes can become
21750b57cec5SDimitry Andric   // dangling.
21760b57cec5SDimitry Andric   for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
21770fca6ea1SDimitry Andric     if (i < RetElts.size() && RetElts[i]) {
21780fca6ea1SDimitry Andric       InVals.push_back(RetElts[i]);
21790fca6ea1SDimitry Andric       continue;
21800fca6ea1SDimitry Andric     }
21810fca6ea1SDimitry Andric 
21820b57cec5SDimitry Andric     SDValue Ret = DAG.getNode(
21830b57cec5SDimitry Andric       NVPTXISD::ProxyReg, dl,
21840b57cec5SDimitry Andric       DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
218506c3fb27SDimitry Andric       { Chain, ProxyRegOps[i], InGlue }
21860b57cec5SDimitry Andric     );
21870b57cec5SDimitry Andric 
21880b57cec5SDimitry Andric     Chain = Ret.getValue(1);
218906c3fb27SDimitry Andric     InGlue = Ret.getValue(2);
21900b57cec5SDimitry Andric 
219181ad6265SDimitry Andric     if (ProxyRegTruncates[i]) {
2192bdd1243dSDimitry Andric       Ret = DAG.getNode(ISD::TRUNCATE, dl, *ProxyRegTruncates[i], Ret);
21930b57cec5SDimitry Andric     }
21940b57cec5SDimitry Andric 
21950b57cec5SDimitry Andric     InVals.push_back(Ret);
21960b57cec5SDimitry Andric   }
21970b57cec5SDimitry Andric 
21980fca6ea1SDimitry Andric   for (SDValue &T : TempProxyRegOps) {
21990fca6ea1SDimitry Andric     SDValue Repl = DAG.getNode(
22000fca6ea1SDimitry Andric         NVPTXISD::ProxyReg, dl,
22010fca6ea1SDimitry Andric         DAG.getVTList(T.getSimpleValueType(), MVT::Other, MVT::Glue),
22020fca6ea1SDimitry Andric         {Chain, T.getOperand(0), InGlue});
22030fca6ea1SDimitry Andric     DAG.ReplaceAllUsesWith(T, Repl);
22040fca6ea1SDimitry Andric     DAG.RemoveDeadNode(T.getNode());
22050fca6ea1SDimitry Andric 
22060fca6ea1SDimitry Andric     Chain = Repl.getValue(1);
22070fca6ea1SDimitry Andric     InGlue = Repl.getValue(2);
22080fca6ea1SDimitry Andric   }
22090fca6ea1SDimitry Andric 
22100b57cec5SDimitry Andric   // set isTailCall to false for now, until we figure out how to express
22110b57cec5SDimitry Andric   // tail call optimization in PTX
22120b57cec5SDimitry Andric   isTailCall = false;
22130b57cec5SDimitry Andric   return Chain;
22140b57cec5SDimitry Andric }
22150b57cec5SDimitry Andric 
22165f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
22175f757f3fSDimitry Andric                                                      SelectionDAG &DAG) const {
22180fca6ea1SDimitry Andric 
22190fca6ea1SDimitry Andric   if (STI.getPTXVersion() < 73 || STI.getSmVersion() < 52) {
22205f757f3fSDimitry Andric     const Function &Fn = DAG.getMachineFunction().getFunction();
22215f757f3fSDimitry Andric 
22225f757f3fSDimitry Andric     DiagnosticInfoUnsupported NoDynamicAlloca(
22230fca6ea1SDimitry Andric         Fn,
22240fca6ea1SDimitry Andric         "Support for dynamic alloca introduced in PTX ISA version 7.3 and "
22250fca6ea1SDimitry Andric         "requires target sm_52.",
22265f757f3fSDimitry Andric         SDLoc(Op).getDebugLoc());
22275f757f3fSDimitry Andric     DAG.getContext()->diagnose(NoDynamicAlloca);
22280fca6ea1SDimitry Andric     auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()),
22290fca6ea1SDimitry Andric                 Op.getOperand(0)};
22305f757f3fSDimitry Andric     return DAG.getMergeValues(Ops, SDLoc());
22315f757f3fSDimitry Andric   }
22325f757f3fSDimitry Andric 
22330fca6ea1SDimitry Andric   SDValue Chain = Op.getOperand(0);
22340fca6ea1SDimitry Andric   SDValue Size = Op.getOperand(1);
22350fca6ea1SDimitry Andric   uint64_t Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
22360fca6ea1SDimitry Andric   SDLoc DL(Op.getNode());
22370fca6ea1SDimitry Andric 
22380fca6ea1SDimitry Andric   // The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32.
22390fca6ea1SDimitry Andric   if (nvTM->is64Bit())
22400fca6ea1SDimitry Andric     Size = DAG.getZExtOrTrunc(Size, DL, MVT::i64);
22410fca6ea1SDimitry Andric   else
22420fca6ea1SDimitry Andric     Size = DAG.getZExtOrTrunc(Size, DL, MVT::i32);
22430fca6ea1SDimitry Andric 
22440fca6ea1SDimitry Andric   SDValue AllocOps[] = {Chain, Size,
22450fca6ea1SDimitry Andric                         DAG.getTargetConstant(Align, DL, MVT::i32)};
22460fca6ea1SDimitry Andric   SDValue Alloca = DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL,
22470fca6ea1SDimitry Andric                                nvTM->is64Bit() ? MVT::i64 : MVT::i32, AllocOps);
22480fca6ea1SDimitry Andric 
22490fca6ea1SDimitry Andric   SDValue MergeOps[] = {Alloca, Chain};
22500fca6ea1SDimitry Andric   return DAG.getMergeValues(MergeOps, DL);
22510fca6ea1SDimitry Andric }
22520fca6ea1SDimitry Andric 
22530b57cec5SDimitry Andric // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
22540b57cec5SDimitry Andric // (see LegalizeDAG.cpp). This is slow and uses local memory.
22550b57cec5SDimitry Andric // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
22560b57cec5SDimitry Andric SDValue
22570b57cec5SDimitry Andric NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
22580b57cec5SDimitry Andric   SDNode *Node = Op.getNode();
22590b57cec5SDimitry Andric   SDLoc dl(Node);
22600b57cec5SDimitry Andric   SmallVector<SDValue, 8> Ops;
22610b57cec5SDimitry Andric   unsigned NumOperands = Node->getNumOperands();
22620b57cec5SDimitry Andric   for (unsigned i = 0; i < NumOperands; ++i) {
22630b57cec5SDimitry Andric     SDValue SubOp = Node->getOperand(i);
22640b57cec5SDimitry Andric     EVT VVT = SubOp.getNode()->getValueType(0);
22650b57cec5SDimitry Andric     EVT EltVT = VVT.getVectorElementType();
22660b57cec5SDimitry Andric     unsigned NumSubElem = VVT.getVectorNumElements();
22670b57cec5SDimitry Andric     for (unsigned j = 0; j < NumSubElem; ++j) {
22680b57cec5SDimitry Andric       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
22690b57cec5SDimitry Andric                                 DAG.getIntPtrConstant(j, dl)));
22700b57cec5SDimitry Andric     }
22710b57cec5SDimitry Andric   }
22720b57cec5SDimitry Andric   return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
22730b57cec5SDimitry Andric }
22740b57cec5SDimitry Andric 
22755f757f3fSDimitry Andric // We can init constant f16x2/v2i16/v4i8 with a single .b32 move.  Normally it
22760b57cec5SDimitry Andric // would get lowered as two constant loads and vector-packing move.
22770b57cec5SDimitry Andric // Instead we want just a constant move:
22785f757f3fSDimitry Andric //        mov.b32         %r2, 0x40003C00
22790b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
22800b57cec5SDimitry Andric                                                SelectionDAG &DAG) const {
22815f757f3fSDimitry Andric   EVT VT = Op->getValueType(0);
22825f757f3fSDimitry Andric   if (!(Isv2x16VT(VT) || VT == MVT::v4i8))
22830b57cec5SDimitry Andric     return Op;
22840b57cec5SDimitry Andric 
22855f757f3fSDimitry Andric   SDLoc DL(Op);
22865f757f3fSDimitry Andric 
22875f757f3fSDimitry Andric   if (!llvm::all_of(Op->ops(), [](SDValue Operand) {
22885f757f3fSDimitry Andric         return Operand->isUndef() || isa<ConstantSDNode>(Operand) ||
22895f757f3fSDimitry Andric                isa<ConstantFPSDNode>(Operand);
22905f757f3fSDimitry Andric       })) {
22915f757f3fSDimitry Andric     // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us
22925f757f3fSDimitry Andric     // to optimize calculation of constant parts.
22935f757f3fSDimitry Andric     if (VT == MVT::v4i8) {
22945f757f3fSDimitry Andric       SDValue C8 = DAG.getConstant(8, DL, MVT::i32);
22955f757f3fSDimitry Andric       SDValue E01 = DAG.getNode(
22965f757f3fSDimitry Andric           NVPTXISD::BFI, DL, MVT::i32,
22975f757f3fSDimitry Andric           DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32),
22985f757f3fSDimitry Andric           DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8);
22995f757f3fSDimitry Andric       SDValue E012 =
23005f757f3fSDimitry Andric           DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
23015f757f3fSDimitry Andric                       DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32),
23025f757f3fSDimitry Andric                       E01, DAG.getConstant(16, DL, MVT::i32), C8);
23035f757f3fSDimitry Andric       SDValue E0123 =
23045f757f3fSDimitry Andric           DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
23055f757f3fSDimitry Andric                       DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32),
23065f757f3fSDimitry Andric                       E012, DAG.getConstant(24, DL, MVT::i32), C8);
23075f757f3fSDimitry Andric       return DAG.getNode(ISD::BITCAST, DL, VT, E0123);
23085f757f3fSDimitry Andric     }
23095f757f3fSDimitry Andric     return Op;
23105f757f3fSDimitry Andric   }
23115f757f3fSDimitry Andric 
23125f757f3fSDimitry Andric   // Get value or the Nth operand as an APInt(32). Undef values treated as 0.
23135f757f3fSDimitry Andric   auto GetOperand = [](SDValue Op, int N) -> APInt {
23145f757f3fSDimitry Andric     const SDValue &Operand = Op->getOperand(N);
23155f757f3fSDimitry Andric     EVT VT = Op->getValueType(0);
23165f757f3fSDimitry Andric     if (Operand->isUndef())
23175f757f3fSDimitry Andric       return APInt(32, 0);
23185f757f3fSDimitry Andric     APInt Value;
23195f757f3fSDimitry Andric     if (VT == MVT::v2f16 || VT == MVT::v2bf16)
23205f757f3fSDimitry Andric       Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt();
23215f757f3fSDimitry Andric     else if (VT == MVT::v2i16 || VT == MVT::v4i8)
2322297eecfbSDimitry Andric       Value = Operand->getAsAPIntVal();
23235f757f3fSDimitry Andric     else
23245f757f3fSDimitry Andric       llvm_unreachable("Unsupported type");
23255f757f3fSDimitry Andric     // i8 values are carried around as i16, so we need to zero out upper bits,
23265f757f3fSDimitry Andric     // so they do not get in the way of combining individual byte values
23275f757f3fSDimitry Andric     if (VT == MVT::v4i8)
23285f757f3fSDimitry Andric       Value = Value.trunc(8);
23295f757f3fSDimitry Andric     return Value.zext(32);
23305f757f3fSDimitry Andric   };
23315f757f3fSDimitry Andric   APInt Value;
23325f757f3fSDimitry Andric   if (Isv2x16VT(VT)) {
23335f757f3fSDimitry Andric     Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(16);
23345f757f3fSDimitry Andric   } else if (VT == MVT::v4i8) {
23355f757f3fSDimitry Andric     Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(8) |
23365f757f3fSDimitry Andric             GetOperand(Op, 2).shl(16) | GetOperand(Op, 3).shl(24);
23375f757f3fSDimitry Andric   } else {
23385f757f3fSDimitry Andric     llvm_unreachable("Unsupported type");
23395f757f3fSDimitry Andric   }
23405f757f3fSDimitry Andric   SDValue Const = DAG.getConstant(Value, SDLoc(Op), MVT::i32);
234106c3fb27SDimitry Andric   return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const);
23420b57cec5SDimitry Andric }
23430b57cec5SDimitry Andric 
23440b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
23450b57cec5SDimitry Andric                                                      SelectionDAG &DAG) const {
23460b57cec5SDimitry Andric   SDValue Index = Op->getOperand(1);
23475f757f3fSDimitry Andric   SDValue Vector = Op->getOperand(0);
23485f757f3fSDimitry Andric   SDLoc DL(Op);
23495f757f3fSDimitry Andric   EVT VectorVT = Vector.getValueType();
23505f757f3fSDimitry Andric 
23515f757f3fSDimitry Andric   if (VectorVT == MVT::v4i8) {
23525f757f3fSDimitry Andric     SDValue BFE =
23535f757f3fSDimitry Andric         DAG.getNode(NVPTXISD::BFE, DL, MVT::i32,
23545f757f3fSDimitry Andric                     {Vector,
23555f757f3fSDimitry Andric                      DAG.getNode(ISD::MUL, DL, MVT::i32,
23565f757f3fSDimitry Andric                                  DAG.getZExtOrTrunc(Index, DL, MVT::i32),
23575f757f3fSDimitry Andric                                  DAG.getConstant(8, DL, MVT::i32)),
23585f757f3fSDimitry Andric                      DAG.getConstant(8, DL, MVT::i32)});
23595f757f3fSDimitry Andric     return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0));
23605f757f3fSDimitry Andric   }
23615f757f3fSDimitry Andric 
23620b57cec5SDimitry Andric   // Constant index will be matched by tablegen.
23630b57cec5SDimitry Andric   if (isa<ConstantSDNode>(Index.getNode()))
23640b57cec5SDimitry Andric     return Op;
23650b57cec5SDimitry Andric 
23660b57cec5SDimitry Andric   // Extract individual elements and select one of them.
23675f757f3fSDimitry Andric   assert(Isv2x16VT(VectorVT) && "Unexpected vector type.");
23680b57cec5SDimitry Andric   EVT EltVT = VectorVT.getVectorElementType();
23690b57cec5SDimitry Andric 
23700b57cec5SDimitry Andric   SDLoc dl(Op.getNode());
23710b57cec5SDimitry Andric   SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
23720b57cec5SDimitry Andric                            DAG.getIntPtrConstant(0, dl));
23730b57cec5SDimitry Andric   SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
23740b57cec5SDimitry Andric                            DAG.getIntPtrConstant(1, dl));
23750b57cec5SDimitry Andric   return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1,
23760b57cec5SDimitry Andric                          ISD::CondCode::SETEQ);
23770b57cec5SDimitry Andric }
23780b57cec5SDimitry Andric 
23795f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
23805f757f3fSDimitry Andric                                                     SelectionDAG &DAG) const {
23815f757f3fSDimitry Andric   SDValue Vector = Op->getOperand(0);
23825f757f3fSDimitry Andric   EVT VectorVT = Vector.getValueType();
23835f757f3fSDimitry Andric 
23845f757f3fSDimitry Andric   if (VectorVT != MVT::v4i8)
23855f757f3fSDimitry Andric     return Op;
23865f757f3fSDimitry Andric   SDLoc DL(Op);
23875f757f3fSDimitry Andric   SDValue Value = Op->getOperand(1);
23885f757f3fSDimitry Andric   if (Value->isUndef())
23895f757f3fSDimitry Andric     return Vector;
23905f757f3fSDimitry Andric 
23915f757f3fSDimitry Andric   SDValue Index = Op->getOperand(2);
23925f757f3fSDimitry Andric 
23935f757f3fSDimitry Andric   SDValue BFI =
23945f757f3fSDimitry Andric       DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
23955f757f3fSDimitry Andric                   {DAG.getZExtOrTrunc(Value, DL, MVT::i32), Vector,
23965f757f3fSDimitry Andric                    DAG.getNode(ISD::MUL, DL, MVT::i32,
23975f757f3fSDimitry Andric                                DAG.getZExtOrTrunc(Index, DL, MVT::i32),
23985f757f3fSDimitry Andric                                DAG.getConstant(8, DL, MVT::i32)),
23995f757f3fSDimitry Andric                    DAG.getConstant(8, DL, MVT::i32)});
24005f757f3fSDimitry Andric   return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI);
24015f757f3fSDimitry Andric }
24025f757f3fSDimitry Andric 
24035f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
24045f757f3fSDimitry Andric                                                  SelectionDAG &DAG) const {
24055f757f3fSDimitry Andric   SDValue V1 = Op.getOperand(0);
24065f757f3fSDimitry Andric   EVT VectorVT = V1.getValueType();
24075f757f3fSDimitry Andric   if (VectorVT != MVT::v4i8 || Op.getValueType() != MVT::v4i8)
24085f757f3fSDimitry Andric     return Op;
24095f757f3fSDimitry Andric 
24105f757f3fSDimitry Andric   // Lower shuffle to PRMT instruction.
24115f757f3fSDimitry Andric   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
24125f757f3fSDimitry Andric   SDValue V2 = Op.getOperand(1);
24135f757f3fSDimitry Andric   uint32_t Selector = 0;
24147a6dacacSDimitry Andric   for (auto I : llvm::enumerate(SVN->getMask())) {
24157a6dacacSDimitry Andric     if (I.value() != -1) // -1 is a placeholder for undef.
24165f757f3fSDimitry Andric       Selector |= (I.value() << (I.index() * 4));
24177a6dacacSDimitry Andric   }
24185f757f3fSDimitry Andric 
24195f757f3fSDimitry Andric   SDLoc DL(Op);
24205f757f3fSDimitry Andric   return DAG.getNode(NVPTXISD::PRMT, DL, MVT::v4i8, V1, V2,
24215f757f3fSDimitry Andric                      DAG.getConstant(Selector, DL, MVT::i32),
24225f757f3fSDimitry Andric                      DAG.getConstant(NVPTX::PTXPrmtMode::NONE, DL, MVT::i32));
24235f757f3fSDimitry Andric }
24240b57cec5SDimitry Andric /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
24250b57cec5SDimitry Andric /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
24260b57cec5SDimitry Andric ///    amount, or
24270b57cec5SDimitry Andric /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
24280b57cec5SDimitry Andric ///    amount.
24290b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
24300b57cec5SDimitry Andric                                                   SelectionDAG &DAG) const {
24310b57cec5SDimitry Andric   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
24320b57cec5SDimitry Andric   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
24330b57cec5SDimitry Andric 
24340b57cec5SDimitry Andric   EVT VT = Op.getValueType();
24350b57cec5SDimitry Andric   unsigned VTBits = VT.getSizeInBits();
24360b57cec5SDimitry Andric   SDLoc dl(Op);
24370b57cec5SDimitry Andric   SDValue ShOpLo = Op.getOperand(0);
24380b57cec5SDimitry Andric   SDValue ShOpHi = Op.getOperand(1);
24390b57cec5SDimitry Andric   SDValue ShAmt  = Op.getOperand(2);
24400b57cec5SDimitry Andric   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
24410b57cec5SDimitry Andric 
24420b57cec5SDimitry Andric   if (VTBits == 32 && STI.getSmVersion() >= 35) {
24430b57cec5SDimitry Andric     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
24440b57cec5SDimitry Andric     // {dHi, dLo} = {aHi, aLo} >> Amt
24450b57cec5SDimitry Andric     //   dHi = aHi >> Amt
24460b57cec5SDimitry Andric     //   dLo = shf.r.clamp aLo, aHi, Amt
24470b57cec5SDimitry Andric 
24480b57cec5SDimitry Andric     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
24490b57cec5SDimitry Andric     SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
24500b57cec5SDimitry Andric                              ShAmt);
24510b57cec5SDimitry Andric 
24520b57cec5SDimitry Andric     SDValue Ops[2] = { Lo, Hi };
24530b57cec5SDimitry Andric     return DAG.getMergeValues(Ops, dl);
24540b57cec5SDimitry Andric   }
24550b57cec5SDimitry Andric   else {
24560b57cec5SDimitry Andric     // {dHi, dLo} = {aHi, aLo} >> Amt
24570b57cec5SDimitry Andric     // - if (Amt>=size) then
24580b57cec5SDimitry Andric     //      dLo = aHi >> (Amt-size)
24590b57cec5SDimitry Andric     //      dHi = aHi >> Amt (this is either all 0 or all 1)
24600b57cec5SDimitry Andric     //   else
24610b57cec5SDimitry Andric     //      dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
24620b57cec5SDimitry Andric     //      dHi = aHi >> Amt
24630b57cec5SDimitry Andric 
24640b57cec5SDimitry Andric     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
24650b57cec5SDimitry Andric                                    DAG.getConstant(VTBits, dl, MVT::i32),
24660b57cec5SDimitry Andric                                    ShAmt);
24670b57cec5SDimitry Andric     SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
24680b57cec5SDimitry Andric     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
24690b57cec5SDimitry Andric                                      DAG.getConstant(VTBits, dl, MVT::i32));
24700b57cec5SDimitry Andric     SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
24710b57cec5SDimitry Andric     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
24720b57cec5SDimitry Andric     SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
24730b57cec5SDimitry Andric 
24740b57cec5SDimitry Andric     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
24750b57cec5SDimitry Andric                                DAG.getConstant(VTBits, dl, MVT::i32),
24760b57cec5SDimitry Andric                                ISD::SETGE);
24770b57cec5SDimitry Andric     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
24780b57cec5SDimitry Andric     SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
24790b57cec5SDimitry Andric 
24800b57cec5SDimitry Andric     SDValue Ops[2] = { Lo, Hi };
24810b57cec5SDimitry Andric     return DAG.getMergeValues(Ops, dl);
24820b57cec5SDimitry Andric   }
24830b57cec5SDimitry Andric }
24840b57cec5SDimitry Andric 
24850b57cec5SDimitry Andric /// LowerShiftLeftParts - Lower SHL_PARTS, which
24860b57cec5SDimitry Andric /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
24870b57cec5SDimitry Andric ///    amount, or
24880b57cec5SDimitry Andric /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
24890b57cec5SDimitry Andric ///    amount.
24900b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
24910b57cec5SDimitry Andric                                                  SelectionDAG &DAG) const {
24920b57cec5SDimitry Andric   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
24930b57cec5SDimitry Andric   assert(Op.getOpcode() == ISD::SHL_PARTS);
24940b57cec5SDimitry Andric 
24950b57cec5SDimitry Andric   EVT VT = Op.getValueType();
24960b57cec5SDimitry Andric   unsigned VTBits = VT.getSizeInBits();
24970b57cec5SDimitry Andric   SDLoc dl(Op);
24980b57cec5SDimitry Andric   SDValue ShOpLo = Op.getOperand(0);
24990b57cec5SDimitry Andric   SDValue ShOpHi = Op.getOperand(1);
25000b57cec5SDimitry Andric   SDValue ShAmt  = Op.getOperand(2);
25010b57cec5SDimitry Andric 
25020b57cec5SDimitry Andric   if (VTBits == 32 && STI.getSmVersion() >= 35) {
25030b57cec5SDimitry Andric     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
25040b57cec5SDimitry Andric     // {dHi, dLo} = {aHi, aLo} << Amt
25050b57cec5SDimitry Andric     //   dHi = shf.l.clamp aLo, aHi, Amt
25060b57cec5SDimitry Andric     //   dLo = aLo << Amt
25070b57cec5SDimitry Andric 
25080b57cec5SDimitry Andric     SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
25090b57cec5SDimitry Andric                              ShAmt);
25100b57cec5SDimitry Andric     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
25110b57cec5SDimitry Andric 
25120b57cec5SDimitry Andric     SDValue Ops[2] = { Lo, Hi };
25130b57cec5SDimitry Andric     return DAG.getMergeValues(Ops, dl);
25140b57cec5SDimitry Andric   }
25150b57cec5SDimitry Andric   else {
25160b57cec5SDimitry Andric     // {dHi, dLo} = {aHi, aLo} << Amt
25170b57cec5SDimitry Andric     // - if (Amt>=size) then
25180b57cec5SDimitry Andric     //      dLo = aLo << Amt (all 0)
25190b57cec5SDimitry Andric     //      dLo = aLo << (Amt-size)
25200b57cec5SDimitry Andric     //   else
25210b57cec5SDimitry Andric     //      dLo = aLo << Amt
25220b57cec5SDimitry Andric     //      dHi = (aHi << Amt) | (aLo >> (size-Amt))
25230b57cec5SDimitry Andric 
25240b57cec5SDimitry Andric     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
25250b57cec5SDimitry Andric                                    DAG.getConstant(VTBits, dl, MVT::i32),
25260b57cec5SDimitry Andric                                    ShAmt);
25270b57cec5SDimitry Andric     SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
25280b57cec5SDimitry Andric     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
25290b57cec5SDimitry Andric                                      DAG.getConstant(VTBits, dl, MVT::i32));
25300b57cec5SDimitry Andric     SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
25310b57cec5SDimitry Andric     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
25320b57cec5SDimitry Andric     SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
25330b57cec5SDimitry Andric 
25340b57cec5SDimitry Andric     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
25350b57cec5SDimitry Andric                                DAG.getConstant(VTBits, dl, MVT::i32),
25360b57cec5SDimitry Andric                                ISD::SETGE);
25370b57cec5SDimitry Andric     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
25380b57cec5SDimitry Andric     SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
25390b57cec5SDimitry Andric 
25400b57cec5SDimitry Andric     SDValue Ops[2] = { Lo, Hi };
25410b57cec5SDimitry Andric     return DAG.getMergeValues(Ops, dl);
25420b57cec5SDimitry Andric   }
25430b57cec5SDimitry Andric }
25440b57cec5SDimitry Andric 
25450b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
25460b57cec5SDimitry Andric   EVT VT = Op.getValueType();
25470b57cec5SDimitry Andric 
25480b57cec5SDimitry Andric   if (VT == MVT::f32)
25490b57cec5SDimitry Andric     return LowerFROUND32(Op, DAG);
25500b57cec5SDimitry Andric 
25510b57cec5SDimitry Andric   if (VT == MVT::f64)
25520b57cec5SDimitry Andric     return LowerFROUND64(Op, DAG);
25530b57cec5SDimitry Andric 
25540b57cec5SDimitry Andric   llvm_unreachable("unhandled type");
25550b57cec5SDimitry Andric }
25560b57cec5SDimitry Andric 
25570b57cec5SDimitry Andric // This is the the rounding method used in CUDA libdevice in C like code:
25580b57cec5SDimitry Andric // float roundf(float A)
25590b57cec5SDimitry Andric // {
25600b57cec5SDimitry Andric //   float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
25610b57cec5SDimitry Andric //   RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
25620b57cec5SDimitry Andric //   return abs(A) < 0.5 ? (float)(int)A : RoundedA;
25630b57cec5SDimitry Andric // }
25640b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op,
25650b57cec5SDimitry Andric                                            SelectionDAG &DAG) const {
25660b57cec5SDimitry Andric   SDLoc SL(Op);
25670b57cec5SDimitry Andric   SDValue A = Op.getOperand(0);
25680b57cec5SDimitry Andric   EVT VT = Op.getValueType();
25690b57cec5SDimitry Andric 
25700b57cec5SDimitry Andric   SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
25710b57cec5SDimitry Andric 
25720b57cec5SDimitry Andric   // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))
25730b57cec5SDimitry Andric   SDValue Bitcast  = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A);
25740b57cec5SDimitry Andric   const int SignBitMask = 0x80000000;
25750b57cec5SDimitry Andric   SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast,
25760b57cec5SDimitry Andric                              DAG.getConstant(SignBitMask, SL, MVT::i32));
25770b57cec5SDimitry Andric   const int PointFiveInBits = 0x3F000000;
25780b57cec5SDimitry Andric   SDValue PointFiveWithSignRaw =
25790b57cec5SDimitry Andric       DAG.getNode(ISD::OR, SL, MVT::i32, Sign,
25800b57cec5SDimitry Andric                   DAG.getConstant(PointFiveInBits, SL, MVT::i32));
25810b57cec5SDimitry Andric   SDValue PointFiveWithSign =
25820b57cec5SDimitry Andric       DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw);
25830b57cec5SDimitry Andric   SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign);
25840b57cec5SDimitry Andric   SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
25850b57cec5SDimitry Andric 
25860b57cec5SDimitry Andric   // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
25870b57cec5SDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
25880b57cec5SDimitry Andric   SDValue IsLarge =
25890b57cec5SDimitry Andric       DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT),
25900b57cec5SDimitry Andric                    ISD::SETOGT);
25910b57cec5SDimitry Andric   RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
25920b57cec5SDimitry Andric 
25930b57cec5SDimitry Andric   // return abs(A) < 0.5 ? (float)(int)A : RoundedA;
25940b57cec5SDimitry Andric   SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
25950b57cec5SDimitry Andric                                 DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
25960b57cec5SDimitry Andric   SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A);
25970b57cec5SDimitry Andric   return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA);
25980b57cec5SDimitry Andric }
25990b57cec5SDimitry Andric 
26000b57cec5SDimitry Andric // The implementation of round(double) is similar to that of round(float) in
26010b57cec5SDimitry Andric // that they both separate the value range into three regions and use a method
26020b57cec5SDimitry Andric // specific to the region to round the values. However, round(double) first
26030b57cec5SDimitry Andric // calculates the round of the absolute value and then adds the sign back while
26040b57cec5SDimitry Andric // round(float) directly rounds the value with sign.
26050b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
26060b57cec5SDimitry Andric                                            SelectionDAG &DAG) const {
26070b57cec5SDimitry Andric   SDLoc SL(Op);
26080b57cec5SDimitry Andric   SDValue A = Op.getOperand(0);
26090b57cec5SDimitry Andric   EVT VT = Op.getValueType();
26100b57cec5SDimitry Andric 
26110b57cec5SDimitry Andric   SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
26120b57cec5SDimitry Andric 
26130b57cec5SDimitry Andric   // double RoundedA = (double) (int) (abs(A) + 0.5f);
26140b57cec5SDimitry Andric   SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA,
26150b57cec5SDimitry Andric                                   DAG.getConstantFP(0.5, SL, VT));
26160b57cec5SDimitry Andric   SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
26170b57cec5SDimitry Andric 
26180b57cec5SDimitry Andric   // RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA;
26190b57cec5SDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
26200b57cec5SDimitry Andric   SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
26210b57cec5SDimitry Andric                                 DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
26220b57cec5SDimitry Andric   RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall,
26230b57cec5SDimitry Andric                          DAG.getConstantFP(0, SL, VT),
26240b57cec5SDimitry Andric                          RoundedA);
26250b57cec5SDimitry Andric 
26260b57cec5SDimitry Andric   // Add sign to rounded_A
26270b57cec5SDimitry Andric   RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A);
26280b57cec5SDimitry Andric   DAG.getNode(ISD::FTRUNC, SL, VT, A);
26290b57cec5SDimitry Andric 
26300b57cec5SDimitry Andric   // RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA;
26310b57cec5SDimitry Andric   SDValue IsLarge =
26320b57cec5SDimitry Andric       DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT),
26330b57cec5SDimitry Andric                    ISD::SETOGT);
26340b57cec5SDimitry Andric   return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
26350b57cec5SDimitry Andric }
26360b57cec5SDimitry Andric 
26375f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op,
26385f757f3fSDimitry Andric                                             SelectionDAG &DAG) const {
26395f757f3fSDimitry Andric   assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
26400b57cec5SDimitry Andric 
26415f757f3fSDimitry Andric   if (Op.getValueType() == MVT::bf16) {
26425f757f3fSDimitry Andric     SDLoc Loc(Op);
26435f757f3fSDimitry Andric     return DAG.getNode(
26445f757f3fSDimitry Andric         ISD::FP_ROUND, Loc, MVT::bf16,
26455f757f3fSDimitry Andric         DAG.getNode(Op.getOpcode(), Loc, MVT::f32, Op.getOperand(0)),
26465f757f3fSDimitry Andric         DAG.getIntPtrConstant(0, Loc));
26475f757f3fSDimitry Andric   }
26485f757f3fSDimitry Andric 
26495f757f3fSDimitry Andric   // Everything else is considered legal.
26505f757f3fSDimitry Andric   return Op;
26515f757f3fSDimitry Andric }
26525f757f3fSDimitry Andric 
26535f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerFP_TO_INT(SDValue Op,
26545f757f3fSDimitry Andric                                             SelectionDAG &DAG) const {
26555f757f3fSDimitry Andric   assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
26565f757f3fSDimitry Andric 
26575f757f3fSDimitry Andric   if (Op.getOperand(0).getValueType() == MVT::bf16) {
26585f757f3fSDimitry Andric     SDLoc Loc(Op);
26595f757f3fSDimitry Andric     return DAG.getNode(
26605f757f3fSDimitry Andric         Op.getOpcode(), Loc, Op.getValueType(),
26615f757f3fSDimitry Andric         DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, Op.getOperand(0)));
26625f757f3fSDimitry Andric   }
26635f757f3fSDimitry Andric 
26645f757f3fSDimitry Andric   // Everything else is considered legal.
26655f757f3fSDimitry Andric   return Op;
26665f757f3fSDimitry Andric }
26675f757f3fSDimitry Andric 
26680fca6ea1SDimitry Andric SDValue NVPTXTargetLowering::LowerFP_ROUND(SDValue Op,
26690fca6ea1SDimitry Andric                                            SelectionDAG &DAG) const {
26700fca6ea1SDimitry Andric   EVT NarrowVT = Op.getValueType();
26710fca6ea1SDimitry Andric   SDValue Wide = Op.getOperand(0);
26720fca6ea1SDimitry Andric   EVT WideVT = Wide.getValueType();
26730fca6ea1SDimitry Andric   if (NarrowVT.getScalarType() == MVT::bf16) {
26740fca6ea1SDimitry Andric     const TargetLowering *TLI = STI.getTargetLowering();
26750fca6ea1SDimitry Andric     if (STI.getSmVersion() < 80 || STI.getPTXVersion() < 70) {
26760fca6ea1SDimitry Andric       return TLI->expandFP_ROUND(Op.getNode(), DAG);
26770fca6ea1SDimitry Andric     }
26780fca6ea1SDimitry Andric     if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) {
26790fca6ea1SDimitry Andric       // This combination was the first to support f32 -> bf16.
26800fca6ea1SDimitry Andric       if (STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70) {
26810fca6ea1SDimitry Andric         if (WideVT.getScalarType() == MVT::f32) {
26820fca6ea1SDimitry Andric           return Op;
26830fca6ea1SDimitry Andric         }
26840fca6ea1SDimitry Andric         if (WideVT.getScalarType() == MVT::f64) {
26850fca6ea1SDimitry Andric           SDLoc Loc(Op);
26860fca6ea1SDimitry Andric           // Round-inexact-to-odd f64 to f32, then do the final rounding using
26870fca6ea1SDimitry Andric           // the hardware f32 -> bf16 instruction.
26880fca6ea1SDimitry Andric           SDValue rod = TLI->expandRoundInexactToOdd(
26890fca6ea1SDimitry Andric               WideVT.isVector() ? WideVT.changeVectorElementType(MVT::f32)
26900fca6ea1SDimitry Andric                                 : MVT::f32,
26910fca6ea1SDimitry Andric               Wide, Loc, DAG);
26920fca6ea1SDimitry Andric           return DAG.getFPExtendOrRound(rod, Loc, NarrowVT);
26930fca6ea1SDimitry Andric         }
26940fca6ea1SDimitry Andric       }
26950fca6ea1SDimitry Andric       return TLI->expandFP_ROUND(Op.getNode(), DAG);
26960fca6ea1SDimitry Andric     }
26970fca6ea1SDimitry Andric   }
26980fca6ea1SDimitry Andric 
26990fca6ea1SDimitry Andric   // Everything else is considered legal.
27000fca6ea1SDimitry Andric   return Op;
27010fca6ea1SDimitry Andric }
27020fca6ea1SDimitry Andric 
27030fca6ea1SDimitry Andric SDValue NVPTXTargetLowering::LowerFP_EXTEND(SDValue Op,
27040fca6ea1SDimitry Andric                                             SelectionDAG &DAG) const {
27050fca6ea1SDimitry Andric   SDValue Narrow = Op.getOperand(0);
27060fca6ea1SDimitry Andric   EVT NarrowVT = Narrow.getValueType();
27070fca6ea1SDimitry Andric   EVT WideVT = Op.getValueType();
27080fca6ea1SDimitry Andric   if (NarrowVT.getScalarType() == MVT::bf16) {
27090fca6ea1SDimitry Andric     if (WideVT.getScalarType() == MVT::f32 &&
27100fca6ea1SDimitry Andric         (STI.getSmVersion() < 80 || STI.getPTXVersion() < 71)) {
27110fca6ea1SDimitry Andric       SDLoc Loc(Op);
27120fca6ea1SDimitry Andric       return DAG.getNode(ISD::BF16_TO_FP, Loc, WideVT, Narrow);
27130fca6ea1SDimitry Andric     }
27140fca6ea1SDimitry Andric     if (WideVT.getScalarType() == MVT::f64 &&
27150fca6ea1SDimitry Andric         (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78)) {
27160fca6ea1SDimitry Andric       EVT F32 = NarrowVT.isVector() ? NarrowVT.changeVectorElementType(MVT::f32)
27170fca6ea1SDimitry Andric                                     : MVT::f32;
27180fca6ea1SDimitry Andric       SDLoc Loc(Op);
27190fca6ea1SDimitry Andric       if (STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 71) {
27200fca6ea1SDimitry Andric         Op = DAG.getNode(ISD::FP_EXTEND, Loc, F32, Narrow);
27210fca6ea1SDimitry Andric       } else {
27220fca6ea1SDimitry Andric         Op = DAG.getNode(ISD::BF16_TO_FP, Loc, F32, Narrow);
27230fca6ea1SDimitry Andric       }
27240fca6ea1SDimitry Andric       return DAG.getNode(ISD::FP_EXTEND, Loc, WideVT, Op);
27250fca6ea1SDimitry Andric     }
27260fca6ea1SDimitry Andric   }
27270fca6ea1SDimitry Andric 
27280fca6ea1SDimitry Andric   // Everything else is considered legal.
27290fca6ea1SDimitry Andric   return Op;
27300fca6ea1SDimitry Andric }
27310fca6ea1SDimitry Andric 
27325f757f3fSDimitry Andric static SDValue LowerVectorArith(SDValue Op, SelectionDAG &DAG) {
27335f757f3fSDimitry Andric   SDLoc DL(Op);
27345f757f3fSDimitry Andric   if (Op.getValueType() != MVT::v2i16)
27355f757f3fSDimitry Andric     return Op;
27365f757f3fSDimitry Andric   EVT EltVT = Op.getValueType().getVectorElementType();
27375f757f3fSDimitry Andric   SmallVector<SDValue> VecElements;
27385f757f3fSDimitry Andric   for (int I = 0, E = Op.getValueType().getVectorNumElements(); I < E; I++) {
27395f757f3fSDimitry Andric     SmallVector<SDValue> ScalarArgs;
27405f757f3fSDimitry Andric     llvm::transform(Op->ops(), std::back_inserter(ScalarArgs),
27415f757f3fSDimitry Andric                     [&](const SDUse &O) {
27425f757f3fSDimitry Andric                       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
27435f757f3fSDimitry Andric                                          O.get(), DAG.getIntPtrConstant(I, DL));
27445f757f3fSDimitry Andric                     });
27455f757f3fSDimitry Andric     VecElements.push_back(DAG.getNode(Op.getOpcode(), DL, EltVT, ScalarArgs));
27465f757f3fSDimitry Andric   }
27475f757f3fSDimitry Andric   SDValue V =
27485f757f3fSDimitry Andric       DAG.getNode(ISD::BUILD_VECTOR, DL, Op.getValueType(), VecElements);
27495f757f3fSDimitry Andric   return V;
27505f757f3fSDimitry Andric }
27510b57cec5SDimitry Andric 
27520b57cec5SDimitry Andric SDValue
27530b57cec5SDimitry Andric NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
27540b57cec5SDimitry Andric   switch (Op.getOpcode()) {
27550b57cec5SDimitry Andric   case ISD::RETURNADDR:
27560b57cec5SDimitry Andric     return SDValue();
27570b57cec5SDimitry Andric   case ISD::FRAMEADDR:
27580b57cec5SDimitry Andric     return SDValue();
27590b57cec5SDimitry Andric   case ISD::GlobalAddress:
27600b57cec5SDimitry Andric     return LowerGlobalAddress(Op, DAG);
27610b57cec5SDimitry Andric   case ISD::INTRINSIC_W_CHAIN:
27620b57cec5SDimitry Andric     return Op;
27630b57cec5SDimitry Andric   case ISD::BUILD_VECTOR:
27640b57cec5SDimitry Andric     return LowerBUILD_VECTOR(Op, DAG);
27650b57cec5SDimitry Andric   case ISD::EXTRACT_SUBVECTOR:
27660b57cec5SDimitry Andric     return Op;
27670b57cec5SDimitry Andric   case ISD::EXTRACT_VECTOR_ELT:
27680b57cec5SDimitry Andric     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
27695f757f3fSDimitry Andric   case ISD::INSERT_VECTOR_ELT:
27705f757f3fSDimitry Andric     return LowerINSERT_VECTOR_ELT(Op, DAG);
27715f757f3fSDimitry Andric   case ISD::VECTOR_SHUFFLE:
27725f757f3fSDimitry Andric     return LowerVECTOR_SHUFFLE(Op, DAG);
27730b57cec5SDimitry Andric   case ISD::CONCAT_VECTORS:
27740b57cec5SDimitry Andric     return LowerCONCAT_VECTORS(Op, DAG);
27750b57cec5SDimitry Andric   case ISD::STORE:
27760b57cec5SDimitry Andric     return LowerSTORE(Op, DAG);
27770b57cec5SDimitry Andric   case ISD::LOAD:
27780b57cec5SDimitry Andric     return LowerLOAD(Op, DAG);
27790b57cec5SDimitry Andric   case ISD::SHL_PARTS:
27800b57cec5SDimitry Andric     return LowerShiftLeftParts(Op, DAG);
27810b57cec5SDimitry Andric   case ISD::SRA_PARTS:
27820b57cec5SDimitry Andric   case ISD::SRL_PARTS:
27830b57cec5SDimitry Andric     return LowerShiftRightParts(Op, DAG);
27840b57cec5SDimitry Andric   case ISD::SELECT:
27850b57cec5SDimitry Andric     return LowerSelect(Op, DAG);
27860b57cec5SDimitry Andric   case ISD::FROUND:
27870b57cec5SDimitry Andric     return LowerFROUND(Op, DAG);
27885f757f3fSDimitry Andric   case ISD::SINT_TO_FP:
27895f757f3fSDimitry Andric   case ISD::UINT_TO_FP:
27905f757f3fSDimitry Andric     return LowerINT_TO_FP(Op, DAG);
27915f757f3fSDimitry Andric   case ISD::FP_TO_SINT:
27925f757f3fSDimitry Andric   case ISD::FP_TO_UINT:
27935f757f3fSDimitry Andric     return LowerFP_TO_INT(Op, DAG);
27940fca6ea1SDimitry Andric   case ISD::FP_ROUND:
27950fca6ea1SDimitry Andric     return LowerFP_ROUND(Op, DAG);
27960fca6ea1SDimitry Andric   case ISD::FP_EXTEND:
27970fca6ea1SDimitry Andric     return LowerFP_EXTEND(Op, DAG);
2798bdd1243dSDimitry Andric   case ISD::VAARG:
2799bdd1243dSDimitry Andric     return LowerVAARG(Op, DAG);
2800bdd1243dSDimitry Andric   case ISD::VASTART:
2801bdd1243dSDimitry Andric     return LowerVASTART(Op, DAG);
28025f757f3fSDimitry Andric   case ISD::ABS:
28035f757f3fSDimitry Andric   case ISD::SMIN:
28045f757f3fSDimitry Andric   case ISD::SMAX:
28055f757f3fSDimitry Andric   case ISD::UMIN:
28065f757f3fSDimitry Andric   case ISD::UMAX:
28075f757f3fSDimitry Andric   case ISD::ADD:
28085f757f3fSDimitry Andric   case ISD::SUB:
28095f757f3fSDimitry Andric   case ISD::MUL:
28105f757f3fSDimitry Andric   case ISD::SHL:
28115f757f3fSDimitry Andric   case ISD::SREM:
28125f757f3fSDimitry Andric   case ISD::UREM:
28135f757f3fSDimitry Andric     return LowerVectorArith(Op, DAG);
28145f757f3fSDimitry Andric   case ISD::DYNAMIC_STACKALLOC:
28155f757f3fSDimitry Andric     return LowerDYNAMIC_STACKALLOC(Op, DAG);
28160fca6ea1SDimitry Andric   case ISD::CopyToReg:
28170fca6ea1SDimitry Andric     return LowerCopyToReg_128(Op, DAG);
28180b57cec5SDimitry Andric   default:
28190b57cec5SDimitry Andric     llvm_unreachable("Custom lowering not defined for operation");
28200b57cec5SDimitry Andric   }
28210b57cec5SDimitry Andric }
28220b57cec5SDimitry Andric 
2823bdd1243dSDimitry Andric // This function is almost a copy of SelectionDAG::expandVAArg().
2824bdd1243dSDimitry Andric // The only diff is that this one produces loads from local address space.
2825bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2826bdd1243dSDimitry Andric   const TargetLowering *TLI = STI.getTargetLowering();
2827bdd1243dSDimitry Andric   SDLoc DL(Op);
2828bdd1243dSDimitry Andric 
2829bdd1243dSDimitry Andric   SDNode *Node = Op.getNode();
2830bdd1243dSDimitry Andric   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2831bdd1243dSDimitry Andric   EVT VT = Node->getValueType(0);
2832bdd1243dSDimitry Andric   auto *Ty = VT.getTypeForEVT(*DAG.getContext());
2833bdd1243dSDimitry Andric   SDValue Tmp1 = Node->getOperand(0);
2834bdd1243dSDimitry Andric   SDValue Tmp2 = Node->getOperand(1);
2835bdd1243dSDimitry Andric   const MaybeAlign MA(Node->getConstantOperandVal(3));
2836bdd1243dSDimitry Andric 
2837bdd1243dSDimitry Andric   SDValue VAListLoad = DAG.getLoad(TLI->getPointerTy(DAG.getDataLayout()), DL,
2838bdd1243dSDimitry Andric                                    Tmp1, Tmp2, MachinePointerInfo(V));
2839bdd1243dSDimitry Andric   SDValue VAList = VAListLoad;
2840bdd1243dSDimitry Andric 
2841bdd1243dSDimitry Andric   if (MA && *MA > TLI->getMinStackArgumentAlignment()) {
2842bdd1243dSDimitry Andric     VAList = DAG.getNode(
2843bdd1243dSDimitry Andric         ISD::ADD, DL, VAList.getValueType(), VAList,
2844bdd1243dSDimitry Andric         DAG.getConstant(MA->value() - 1, DL, VAList.getValueType()));
2845bdd1243dSDimitry Andric 
2846bdd1243dSDimitry Andric     VAList = DAG.getNode(
2847bdd1243dSDimitry Andric         ISD::AND, DL, VAList.getValueType(), VAList,
2848bdd1243dSDimitry Andric         DAG.getConstant(-(int64_t)MA->value(), DL, VAList.getValueType()));
2849bdd1243dSDimitry Andric   }
2850bdd1243dSDimitry Andric 
2851bdd1243dSDimitry Andric   // Increment the pointer, VAList, to the next vaarg
2852bdd1243dSDimitry Andric   Tmp1 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
2853bdd1243dSDimitry Andric                      DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(Ty),
2854bdd1243dSDimitry Andric                                      DL, VAList.getValueType()));
2855bdd1243dSDimitry Andric 
2856bdd1243dSDimitry Andric   // Store the incremented VAList to the legalized pointer
2857bdd1243dSDimitry Andric   Tmp1 = DAG.getStore(VAListLoad.getValue(1), DL, Tmp1, Tmp2,
2858bdd1243dSDimitry Andric                       MachinePointerInfo(V));
2859bdd1243dSDimitry Andric 
2860bdd1243dSDimitry Andric   const Value *SrcV =
2861bdd1243dSDimitry Andric       Constant::getNullValue(PointerType::get(Ty, ADDRESS_SPACE_LOCAL));
2862bdd1243dSDimitry Andric 
2863bdd1243dSDimitry Andric   // Load the actual argument out of the pointer VAList
2864bdd1243dSDimitry Andric   return DAG.getLoad(VT, DL, Tmp1, VAList, MachinePointerInfo(SrcV));
2865bdd1243dSDimitry Andric }
2866bdd1243dSDimitry Andric 
2867bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2868bdd1243dSDimitry Andric   const TargetLowering *TLI = STI.getTargetLowering();
2869bdd1243dSDimitry Andric   SDLoc DL(Op);
2870bdd1243dSDimitry Andric   EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());
2871bdd1243dSDimitry Andric 
2872bdd1243dSDimitry Andric   // Store the address of unsized array <function>_vararg[] in the ap object.
2873bdd1243dSDimitry Andric   SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
2874bdd1243dSDimitry Andric   SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg);
2875bdd1243dSDimitry Andric 
2876bdd1243dSDimitry Andric   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2877bdd1243dSDimitry Andric   return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1),
2878bdd1243dSDimitry Andric                       MachinePointerInfo(SV));
2879bdd1243dSDimitry Andric }
2880bdd1243dSDimitry Andric 
28810b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
28820b57cec5SDimitry Andric   SDValue Op0 = Op->getOperand(0);
28830b57cec5SDimitry Andric   SDValue Op1 = Op->getOperand(1);
28840b57cec5SDimitry Andric   SDValue Op2 = Op->getOperand(2);
28850b57cec5SDimitry Andric   SDLoc DL(Op.getNode());
28860b57cec5SDimitry Andric 
28870b57cec5SDimitry Andric   assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
28880b57cec5SDimitry Andric 
28890b57cec5SDimitry Andric   Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
28900b57cec5SDimitry Andric   Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
28910b57cec5SDimitry Andric   SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
28920b57cec5SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
28930b57cec5SDimitry Andric 
28940b57cec5SDimitry Andric   return Trunc;
28950b57cec5SDimitry Andric }
28960b57cec5SDimitry Andric 
28970b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
28980b57cec5SDimitry Andric   if (Op.getValueType() == MVT::i1)
28990b57cec5SDimitry Andric     return LowerLOADi1(Op, DAG);
29000b57cec5SDimitry Andric 
29015f757f3fSDimitry Andric   // v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to handle
29025f757f3fSDimitry Andric   // unaligned loads and have to handle it here.
29035f757f3fSDimitry Andric   EVT VT = Op.getValueType();
29045f757f3fSDimitry Andric   if (Isv2x16VT(VT) || VT == MVT::v4i8) {
29050b57cec5SDimitry Andric     LoadSDNode *Load = cast<LoadSDNode>(Op);
29060b57cec5SDimitry Andric     EVT MemVT = Load->getMemoryVT();
29078bcb0991SDimitry Andric     if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
29088bcb0991SDimitry Andric                                         MemVT, *Load->getMemOperand())) {
29090b57cec5SDimitry Andric       SDValue Ops[2];
29100b57cec5SDimitry Andric       std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
29110b57cec5SDimitry Andric       return DAG.getMergeValues(Ops, SDLoc(Op));
29120b57cec5SDimitry Andric     }
29130b57cec5SDimitry Andric   }
29140b57cec5SDimitry Andric 
29150b57cec5SDimitry Andric   return SDValue();
29160b57cec5SDimitry Andric }
29170b57cec5SDimitry Andric 
29180b57cec5SDimitry Andric // v = ld i1* addr
29190b57cec5SDimitry Andric //   =>
29200b57cec5SDimitry Andric // v1 = ld i8* addr (-> i16)
29210b57cec5SDimitry Andric // v = trunc i16 to i1
29220b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
29230b57cec5SDimitry Andric   SDNode *Node = Op.getNode();
29240b57cec5SDimitry Andric   LoadSDNode *LD = cast<LoadSDNode>(Node);
29250b57cec5SDimitry Andric   SDLoc dl(Node);
29260b57cec5SDimitry Andric   assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
29270b57cec5SDimitry Andric   assert(Node->getValueType(0) == MVT::i1 &&
29280b57cec5SDimitry Andric          "Custom lowering for i1 load only");
29290fca6ea1SDimitry Andric   SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(),
29300fca6ea1SDimitry Andric                                  LD->getBasePtr(), LD->getPointerInfo(),
29310fca6ea1SDimitry Andric                                  MVT::i8, LD->getAlign(),
29320b57cec5SDimitry Andric                                  LD->getMemOperand()->getFlags());
29330b57cec5SDimitry Andric   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
29340b57cec5SDimitry Andric   // The legalizer (the caller) is expecting two values from the legalized
29350b57cec5SDimitry Andric   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
29360b57cec5SDimitry Andric   // in LegalizeDAG.cpp which also uses MergeValues.
29370b57cec5SDimitry Andric   SDValue Ops[] = { result, LD->getChain() };
29380b57cec5SDimitry Andric   return DAG.getMergeValues(Ops, dl);
29390b57cec5SDimitry Andric }
29400b57cec5SDimitry Andric 
29410b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
29420b57cec5SDimitry Andric   StoreSDNode *Store = cast<StoreSDNode>(Op);
29430b57cec5SDimitry Andric   EVT VT = Store->getMemoryVT();
29440b57cec5SDimitry Andric 
29450b57cec5SDimitry Andric   if (VT == MVT::i1)
29460b57cec5SDimitry Andric     return LowerSTOREi1(Op, DAG);
29470b57cec5SDimitry Andric 
29480b57cec5SDimitry Andric   // v2f16 is legal, so we can't rely on legalizer to handle unaligned
29490b57cec5SDimitry Andric   // stores and have to handle it here.
29505f757f3fSDimitry Andric   if ((Isv2x16VT(VT) || VT == MVT::v4i8) &&
29518bcb0991SDimitry Andric       !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
29528bcb0991SDimitry Andric                                       VT, *Store->getMemOperand()))
29530b57cec5SDimitry Andric     return expandUnalignedStore(Store, DAG);
29540b57cec5SDimitry Andric 
29555f757f3fSDimitry Andric   // v2f16, v2bf16 and v2i16 don't need special handling.
29565f757f3fSDimitry Andric   if (Isv2x16VT(VT) || VT == MVT::v4i8)
295706c3fb27SDimitry Andric     return SDValue();
295806c3fb27SDimitry Andric 
29590b57cec5SDimitry Andric   if (VT.isVector())
29600b57cec5SDimitry Andric     return LowerSTOREVector(Op, DAG);
29610b57cec5SDimitry Andric 
29620b57cec5SDimitry Andric   return SDValue();
29630b57cec5SDimitry Andric }
29640b57cec5SDimitry Andric 
29650b57cec5SDimitry Andric SDValue
29660b57cec5SDimitry Andric NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
29670b57cec5SDimitry Andric   SDNode *N = Op.getNode();
29680b57cec5SDimitry Andric   SDValue Val = N->getOperand(1);
29690b57cec5SDimitry Andric   SDLoc DL(N);
29700b57cec5SDimitry Andric   EVT ValVT = Val.getValueType();
29710b57cec5SDimitry Andric 
29720b57cec5SDimitry Andric   if (ValVT.isVector()) {
29730b57cec5SDimitry Andric     // We only handle "native" vector sizes for now, e.g. <4 x double> is not
29740b57cec5SDimitry Andric     // legal.  We can (and should) split that into 2 stores of <2 x double> here
29750b57cec5SDimitry Andric     // but I'm leaving that as a TODO for now.
29760b57cec5SDimitry Andric     if (!ValVT.isSimple())
29770b57cec5SDimitry Andric       return SDValue();
29780b57cec5SDimitry Andric     switch (ValVT.getSimpleVT().SimpleTy) {
29790b57cec5SDimitry Andric     default:
29800b57cec5SDimitry Andric       return SDValue();
29810b57cec5SDimitry Andric     case MVT::v2i8:
29820b57cec5SDimitry Andric     case MVT::v2i16:
29830b57cec5SDimitry Andric     case MVT::v2i32:
29840b57cec5SDimitry Andric     case MVT::v2i64:
29850b57cec5SDimitry Andric     case MVT::v2f16:
2986bdd1243dSDimitry Andric     case MVT::v2bf16:
29870b57cec5SDimitry Andric     case MVT::v2f32:
29880b57cec5SDimitry Andric     case MVT::v2f64:
29890b57cec5SDimitry Andric     case MVT::v4i8:
29900b57cec5SDimitry Andric     case MVT::v4i16:
29910b57cec5SDimitry Andric     case MVT::v4i32:
29920b57cec5SDimitry Andric     case MVT::v4f16:
2993bdd1243dSDimitry Andric     case MVT::v4bf16:
29940b57cec5SDimitry Andric     case MVT::v4f32:
29950b57cec5SDimitry Andric     case MVT::v8f16: // <4 x f16x2>
2996bdd1243dSDimitry Andric     case MVT::v8bf16: // <4 x bf16x2>
29975f757f3fSDimitry Andric     case MVT::v8i16:  // <4 x i16x2>
29980b57cec5SDimitry Andric       // This is a "native" vector type
29990b57cec5SDimitry Andric       break;
30000b57cec5SDimitry Andric     }
30010b57cec5SDimitry Andric 
30020b57cec5SDimitry Andric     MemSDNode *MemSD = cast<MemSDNode>(N);
30030b57cec5SDimitry Andric     const DataLayout &TD = DAG.getDataLayout();
30040b57cec5SDimitry Andric 
30055ffd83dbSDimitry Andric     Align Alignment = MemSD->getAlign();
30065ffd83dbSDimitry Andric     Align PrefAlign =
30075ffd83dbSDimitry Andric         TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
30085ffd83dbSDimitry Andric     if (Alignment < PrefAlign) {
30090b57cec5SDimitry Andric       // This store is not sufficiently aligned, so bail out and let this vector
30100b57cec5SDimitry Andric       // store be scalarized.  Note that we may still be able to emit smaller
30110b57cec5SDimitry Andric       // vector stores.  For example, if we are storing a <4 x float> with an
30120b57cec5SDimitry Andric       // alignment of 8, this check will fail but the legalizer will try again
30130b57cec5SDimitry Andric       // with 2 x <2 x float>, which will succeed with an alignment of 8.
30140b57cec5SDimitry Andric       return SDValue();
30150b57cec5SDimitry Andric     }
30160b57cec5SDimitry Andric 
30170b57cec5SDimitry Andric     unsigned Opcode = 0;
30180b57cec5SDimitry Andric     EVT EltVT = ValVT.getVectorElementType();
30190b57cec5SDimitry Andric     unsigned NumElts = ValVT.getVectorNumElements();
30200b57cec5SDimitry Andric 
30210b57cec5SDimitry Andric     // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
30220b57cec5SDimitry Andric     // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
30230b57cec5SDimitry Andric     // stored type to i16 and propagate the "real" type as the memory type.
30240b57cec5SDimitry Andric     bool NeedExt = false;
30250b57cec5SDimitry Andric     if (EltVT.getSizeInBits() < 16)
30260b57cec5SDimitry Andric       NeedExt = true;
30270b57cec5SDimitry Andric 
30280b57cec5SDimitry Andric     bool StoreF16x2 = false;
30290b57cec5SDimitry Andric     switch (NumElts) {
30300b57cec5SDimitry Andric     default:
30310b57cec5SDimitry Andric       return SDValue();
30320b57cec5SDimitry Andric     case 2:
30330b57cec5SDimitry Andric       Opcode = NVPTXISD::StoreV2;
30340b57cec5SDimitry Andric       break;
30350b57cec5SDimitry Andric     case 4:
30360b57cec5SDimitry Andric       Opcode = NVPTXISD::StoreV4;
30370b57cec5SDimitry Andric       break;
30380b57cec5SDimitry Andric     case 8:
30390b57cec5SDimitry Andric       // v8f16 is a special case. PTX doesn't have st.v8.f16
30400b57cec5SDimitry Andric       // instruction. Instead, we split the vector into v2f16 chunks and
30410b57cec5SDimitry Andric       // store them with st.v4.b32.
30425f757f3fSDimitry Andric       assert(Is16bitsType(EltVT.getSimpleVT()) && "Wrong type for the vector.");
30430b57cec5SDimitry Andric       Opcode = NVPTXISD::StoreV4;
30440b57cec5SDimitry Andric       StoreF16x2 = true;
30450b57cec5SDimitry Andric       break;
30460b57cec5SDimitry Andric     }
30470b57cec5SDimitry Andric 
30480b57cec5SDimitry Andric     SmallVector<SDValue, 8> Ops;
30490b57cec5SDimitry Andric 
30500b57cec5SDimitry Andric     // First is the chain
30510b57cec5SDimitry Andric     Ops.push_back(N->getOperand(0));
30520b57cec5SDimitry Andric 
30530b57cec5SDimitry Andric     if (StoreF16x2) {
30540b57cec5SDimitry Andric       // Combine f16,f16 -> v2f16
30550b57cec5SDimitry Andric       NumElts /= 2;
30560b57cec5SDimitry Andric       for (unsigned i = 0; i < NumElts; ++i) {
305706c3fb27SDimitry Andric         SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
30580b57cec5SDimitry Andric                                  DAG.getIntPtrConstant(i * 2, DL));
305906c3fb27SDimitry Andric         SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
30600b57cec5SDimitry Andric                                  DAG.getIntPtrConstant(i * 2 + 1, DL));
306106c3fb27SDimitry Andric         EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, 2);
306206c3fb27SDimitry Andric         SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, E0, E1);
30630b57cec5SDimitry Andric         Ops.push_back(V2);
30640b57cec5SDimitry Andric       }
30650b57cec5SDimitry Andric     } else {
30660b57cec5SDimitry Andric       // Then the split values
30670b57cec5SDimitry Andric       for (unsigned i = 0; i < NumElts; ++i) {
30680b57cec5SDimitry Andric         SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
30690b57cec5SDimitry Andric                                      DAG.getIntPtrConstant(i, DL));
30700b57cec5SDimitry Andric         if (NeedExt)
30710b57cec5SDimitry Andric           ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
30720b57cec5SDimitry Andric         Ops.push_back(ExtVal);
30730b57cec5SDimitry Andric       }
30740b57cec5SDimitry Andric     }
30750b57cec5SDimitry Andric 
30760b57cec5SDimitry Andric     // Then any remaining arguments
30770b57cec5SDimitry Andric     Ops.append(N->op_begin() + 2, N->op_end());
30780b57cec5SDimitry Andric 
30790b57cec5SDimitry Andric     SDValue NewSt =
30800b57cec5SDimitry Andric         DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
30810b57cec5SDimitry Andric                                 MemSD->getMemoryVT(), MemSD->getMemOperand());
30820b57cec5SDimitry Andric 
30830b57cec5SDimitry Andric     // return DCI.CombineTo(N, NewSt, true);
30840b57cec5SDimitry Andric     return NewSt;
30850b57cec5SDimitry Andric   }
30860b57cec5SDimitry Andric 
30870b57cec5SDimitry Andric   return SDValue();
30880b57cec5SDimitry Andric }
30890b57cec5SDimitry Andric 
30900b57cec5SDimitry Andric // st i1 v, addr
30910b57cec5SDimitry Andric //    =>
30920b57cec5SDimitry Andric // v1 = zxt v to i16
30930b57cec5SDimitry Andric // st.u8 i16, addr
30940b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
30950b57cec5SDimitry Andric   SDNode *Node = Op.getNode();
30960b57cec5SDimitry Andric   SDLoc dl(Node);
30970b57cec5SDimitry Andric   StoreSDNode *ST = cast<StoreSDNode>(Node);
30980b57cec5SDimitry Andric   SDValue Tmp1 = ST->getChain();
30990b57cec5SDimitry Andric   SDValue Tmp2 = ST->getBasePtr();
31000b57cec5SDimitry Andric   SDValue Tmp3 = ST->getValue();
31010b57cec5SDimitry Andric   assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
31020b57cec5SDimitry Andric   Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
31030b57cec5SDimitry Andric   SDValue Result =
31040b57cec5SDimitry Andric       DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,
310581ad6265SDimitry Andric                         ST->getAlign(), ST->getMemOperand()->getFlags());
31060b57cec5SDimitry Andric   return Result;
31070b57cec5SDimitry Andric }
31080b57cec5SDimitry Andric 
31090fca6ea1SDimitry Andric SDValue NVPTXTargetLowering::LowerCopyToReg_128(SDValue Op,
31100fca6ea1SDimitry Andric                                                 SelectionDAG &DAG) const {
31110fca6ea1SDimitry Andric   // Change the CopyToReg to take in two 64-bit operands instead of a 128-bit
31120fca6ea1SDimitry Andric   // operand so that it can pass the legalization.
31130fca6ea1SDimitry Andric 
31140fca6ea1SDimitry Andric   assert(Op.getOperand(1).getValueType() == MVT::i128 &&
31150fca6ea1SDimitry Andric          "Custom lowering for 128-bit CopyToReg only");
31160fca6ea1SDimitry Andric 
31170fca6ea1SDimitry Andric   SDNode *Node = Op.getNode();
31180fca6ea1SDimitry Andric   SDLoc DL(Node);
31190fca6ea1SDimitry Andric 
31200fca6ea1SDimitry Andric   SDValue Cast = DAG.getBitcast(MVT::v2i64, Op->getOperand(2));
31210fca6ea1SDimitry Andric   SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Cast,
31220fca6ea1SDimitry Andric                            DAG.getIntPtrConstant(0, DL));
31230fca6ea1SDimitry Andric   SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Cast,
31240fca6ea1SDimitry Andric                            DAG.getIntPtrConstant(1, DL));
31250fca6ea1SDimitry Andric 
31260fca6ea1SDimitry Andric   SmallVector<SDValue, 5> NewOps(Op->getNumOperands() + 1);
31270fca6ea1SDimitry Andric   SmallVector<EVT, 3> ResultsType(Node->values());
31280fca6ea1SDimitry Andric 
31290fca6ea1SDimitry Andric   NewOps[0] = Op->getOperand(0); // Chain
31300fca6ea1SDimitry Andric   NewOps[1] = Op->getOperand(1); // Dst Reg
31310fca6ea1SDimitry Andric   NewOps[2] = Lo;                // Lower 64-bit
31320fca6ea1SDimitry Andric   NewOps[3] = Hi;                // Higher 64-bit
31330fca6ea1SDimitry Andric   if (Op.getNumOperands() == 4)
31340fca6ea1SDimitry Andric     NewOps[4] = Op->getOperand(3); // Glue if exists
31350fca6ea1SDimitry Andric 
31360fca6ea1SDimitry Andric   return DAG.getNode(ISD::CopyToReg, DL, ResultsType, NewOps);
31370fca6ea1SDimitry Andric }
31380fca6ea1SDimitry Andric 
31390fca6ea1SDimitry Andric unsigned NVPTXTargetLowering::getNumRegisters(
31400fca6ea1SDimitry Andric     LLVMContext &Context, EVT VT,
31410fca6ea1SDimitry Andric     std::optional<MVT> RegisterVT = std::nullopt) const {
31420fca6ea1SDimitry Andric   if (VT == MVT::i128 && RegisterVT == MVT::i128)
31430fca6ea1SDimitry Andric     return 1;
31440fca6ea1SDimitry Andric   return TargetLoweringBase::getNumRegisters(Context, VT, RegisterVT);
31450fca6ea1SDimitry Andric }
31460fca6ea1SDimitry Andric 
31470fca6ea1SDimitry Andric bool NVPTXTargetLowering::splitValueIntoRegisterParts(
31480fca6ea1SDimitry Andric     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
31490fca6ea1SDimitry Andric     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
31500fca6ea1SDimitry Andric   if (Val.getValueType() == MVT::i128 && NumParts == 1) {
31510fca6ea1SDimitry Andric     Parts[0] = Val;
31520fca6ea1SDimitry Andric     return true;
31530fca6ea1SDimitry Andric   }
31540fca6ea1SDimitry Andric   return false;
31550fca6ea1SDimitry Andric }
31560fca6ea1SDimitry Andric 
3157bdd1243dSDimitry Andric // This creates target external symbol for a function parameter.
3158bdd1243dSDimitry Andric // Name of the symbol is composed from its index and the function name.
3159bdd1243dSDimitry Andric // Negative index corresponds to special parameter (unsized array) used for
3160bdd1243dSDimitry Andric // passing variable arguments.
3161bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
3162bdd1243dSDimitry Andric                                             EVT v) const {
316306c3fb27SDimitry Andric   StringRef SavedStr = nvTM->getStrPool().save(
316406c3fb27SDimitry Andric       getParamName(&DAG.getMachineFunction().getFunction(), idx));
3165bdd1243dSDimitry Andric   return DAG.getTargetExternalSymbol(SavedStr.data(), v);
31660b57cec5SDimitry Andric }
31670b57cec5SDimitry Andric 
31680b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFormalArguments(
31690b57cec5SDimitry Andric     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
31700b57cec5SDimitry Andric     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
31710b57cec5SDimitry Andric     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
31720b57cec5SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
31730b57cec5SDimitry Andric   const DataLayout &DL = DAG.getDataLayout();
31740b57cec5SDimitry Andric   auto PtrVT = getPointerTy(DAG.getDataLayout());
31750b57cec5SDimitry Andric 
31760b57cec5SDimitry Andric   const Function *F = &MF.getFunction();
31770b57cec5SDimitry Andric   const AttributeList &PAL = F->getAttributes();
31780b57cec5SDimitry Andric   const TargetLowering *TLI = STI.getTargetLowering();
31790b57cec5SDimitry Andric 
31800b57cec5SDimitry Andric   SDValue Root = DAG.getRoot();
31810b57cec5SDimitry Andric   std::vector<SDValue> OutChains;
31820b57cec5SDimitry Andric 
31830b57cec5SDimitry Andric   bool isABI = (STI.getSmVersion() >= 20);
31840b57cec5SDimitry Andric   assert(isABI && "Non-ABI compilation is not supported");
31850b57cec5SDimitry Andric   if (!isABI)
31860b57cec5SDimitry Andric     return Chain;
31870b57cec5SDimitry Andric 
31880b57cec5SDimitry Andric   std::vector<Type *> argTypes;
31890b57cec5SDimitry Andric   std::vector<const Argument *> theArgs;
31900b57cec5SDimitry Andric   for (const Argument &I : F->args()) {
31910b57cec5SDimitry Andric     theArgs.push_back(&I);
31920b57cec5SDimitry Andric     argTypes.push_back(I.getType());
31930b57cec5SDimitry Andric   }
31940b57cec5SDimitry Andric   // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
31950b57cec5SDimitry Andric   // Ins.size() will be larger
31960b57cec5SDimitry Andric   //   * if there is an aggregate argument with multiple fields (each field
31970b57cec5SDimitry Andric   //     showing up separately in Ins)
31980b57cec5SDimitry Andric   //   * if there is a vector argument with more than typical vector-length
31990b57cec5SDimitry Andric   //     elements (generally if more than 4) where each vector element is
32000b57cec5SDimitry Andric   //     individually present in Ins.
32010b57cec5SDimitry Andric   // So a different index should be used for indexing into Ins.
32020b57cec5SDimitry Andric   // See similar issue in LowerCall.
32030b57cec5SDimitry Andric   unsigned InsIdx = 0;
32040b57cec5SDimitry Andric 
32050fca6ea1SDimitry Andric   for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++InsIdx) {
32060b57cec5SDimitry Andric     Type *Ty = argTypes[i];
32070b57cec5SDimitry Andric 
32080b57cec5SDimitry Andric     if (theArgs[i]->use_empty()) {
32090b57cec5SDimitry Andric       // argument is dead
321006c3fb27SDimitry Andric       if (IsTypePassedAsArray(Ty) && !Ty->isVectorTy()) {
32110b57cec5SDimitry Andric         SmallVector<EVT, 16> vtparts;
32120b57cec5SDimitry Andric 
32130b57cec5SDimitry Andric         ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
321406c3fb27SDimitry Andric         if (vtparts.empty())
321506c3fb27SDimitry Andric           report_fatal_error("Empty parameter types are not supported");
321606c3fb27SDimitry Andric 
32170b57cec5SDimitry Andric         for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
32180b57cec5SDimitry Andric              ++parti) {
32190b57cec5SDimitry Andric           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
32200b57cec5SDimitry Andric           ++InsIdx;
32210b57cec5SDimitry Andric         }
32220b57cec5SDimitry Andric         if (vtparts.size() > 0)
32230b57cec5SDimitry Andric           --InsIdx;
32240b57cec5SDimitry Andric         continue;
32250b57cec5SDimitry Andric       }
32260b57cec5SDimitry Andric       if (Ty->isVectorTy()) {
32270b57cec5SDimitry Andric         EVT ObjectVT = getValueType(DL, Ty);
32280b57cec5SDimitry Andric         unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
32290b57cec5SDimitry Andric         for (unsigned parti = 0; parti < NumRegs; ++parti) {
32300b57cec5SDimitry Andric           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
32310b57cec5SDimitry Andric           ++InsIdx;
32320b57cec5SDimitry Andric         }
32330b57cec5SDimitry Andric         if (NumRegs > 0)
32340b57cec5SDimitry Andric           --InsIdx;
32350b57cec5SDimitry Andric         continue;
32360b57cec5SDimitry Andric       }
32370b57cec5SDimitry Andric       InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
32380b57cec5SDimitry Andric       continue;
32390b57cec5SDimitry Andric     }
32400b57cec5SDimitry Andric 
32410fca6ea1SDimitry Andric     // In the following cases, assign a node order of "i+1"
32420b57cec5SDimitry Andric     // to newly created nodes. The SDNodes for params have to
32430b57cec5SDimitry Andric     // appear in the same order as their order of appearance
32440fca6ea1SDimitry Andric     // in the original function. "i+1" holds that order.
3245349cc55cSDimitry Andric     if (!PAL.hasParamAttr(i, Attribute::ByVal)) {
32460b57cec5SDimitry Andric       bool aggregateIsPacked = false;
32470b57cec5SDimitry Andric       if (StructType *STy = dyn_cast<StructType>(Ty))
32480b57cec5SDimitry Andric         aggregateIsPacked = STy->isPacked();
32490b57cec5SDimitry Andric 
32500b57cec5SDimitry Andric       SmallVector<EVT, 16> VTs;
32510b57cec5SDimitry Andric       SmallVector<uint64_t, 16> Offsets;
32520b57cec5SDimitry Andric       ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
325306c3fb27SDimitry Andric       if (VTs.empty())
325406c3fb27SDimitry Andric         report_fatal_error("Empty parameter types are not supported");
325506c3fb27SDimitry Andric 
32560fca6ea1SDimitry Andric       Align ArgAlign = getFunctionArgumentAlignment(
32570fca6ea1SDimitry Andric           F, Ty, i + AttributeList::FirstArgIndex, DL);
32580fca6ea1SDimitry Andric       auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
32590b57cec5SDimitry Andric 
32600fca6ea1SDimitry Andric       SDValue Arg = getParamSymbol(DAG, i, PtrVT);
32610b57cec5SDimitry Andric       int VecIdx = -1; // Index of the first element of the current vector.
32620b57cec5SDimitry Andric       for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) {
32630b57cec5SDimitry Andric         if (VectorInfo[parti] & PVF_FIRST) {
32640b57cec5SDimitry Andric           assert(VecIdx == -1 && "Orphaned vector.");
32650b57cec5SDimitry Andric           VecIdx = parti;
32660b57cec5SDimitry Andric         }
32670b57cec5SDimitry Andric 
32680b57cec5SDimitry Andric         // That's the last element of this store op.
32690b57cec5SDimitry Andric         if (VectorInfo[parti] & PVF_LAST) {
32700b57cec5SDimitry Andric           unsigned NumElts = parti - VecIdx + 1;
32710b57cec5SDimitry Andric           EVT EltVT = VTs[parti];
32720b57cec5SDimitry Andric           // i1 is loaded/stored as i8.
32730b57cec5SDimitry Andric           EVT LoadVT = EltVT;
32740b57cec5SDimitry Andric           if (EltVT == MVT::i1)
32750b57cec5SDimitry Andric             LoadVT = MVT::i8;
32765f757f3fSDimitry Andric           else if (Isv2x16VT(EltVT) || EltVT == MVT::v4i8)
32770b57cec5SDimitry Andric             // getLoad needs a vector type, but it can't handle
327806c3fb27SDimitry Andric             // vectors which contain v2f16 or v2bf16 elements. So we must load
32790b57cec5SDimitry Andric             // using i32 here and then bitcast back.
32800b57cec5SDimitry Andric             LoadVT = MVT::i32;
32810b57cec5SDimitry Andric 
32820b57cec5SDimitry Andric           EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);
32830b57cec5SDimitry Andric           SDValue VecAddr =
32840b57cec5SDimitry Andric               DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
32850b57cec5SDimitry Andric                           DAG.getConstant(Offsets[VecIdx], dl, PtrVT));
32860b57cec5SDimitry Andric           Value *srcValue = Constant::getNullValue(PointerType::get(
32870b57cec5SDimitry Andric               EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
32880fca6ea1SDimitry Andric 
32890fca6ea1SDimitry Andric           const MaybeAlign PartAlign = [&]() -> MaybeAlign {
32900fca6ea1SDimitry Andric             if (aggregateIsPacked)
32910fca6ea1SDimitry Andric               return Align(1);
32920fca6ea1SDimitry Andric             if (NumElts != 1)
32930fca6ea1SDimitry Andric               return std::nullopt;
32940fca6ea1SDimitry Andric             Align PartAlign =
32950fca6ea1SDimitry Andric                 DL.getABITypeAlign(EltVT.getTypeForEVT(F->getContext()));
32960fca6ea1SDimitry Andric             return commonAlignment(PartAlign, Offsets[parti]);
32970fca6ea1SDimitry Andric           }();
3298bdd1243dSDimitry Andric           SDValue P = DAG.getLoad(VecVT, dl, Root, VecAddr,
32990fca6ea1SDimitry Andric                                   MachinePointerInfo(srcValue), PartAlign,
33000b57cec5SDimitry Andric                                   MachineMemOperand::MODereferenceable |
33010b57cec5SDimitry Andric                                       MachineMemOperand::MOInvariant);
33020b57cec5SDimitry Andric           if (P.getNode())
33030fca6ea1SDimitry Andric             P.getNode()->setIROrder(i + 1);
33040b57cec5SDimitry Andric           for (unsigned j = 0; j < NumElts; ++j) {
33050b57cec5SDimitry Andric             SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P,
33060b57cec5SDimitry Andric                                       DAG.getIntPtrConstant(j, dl));
33070b57cec5SDimitry Andric             // We've loaded i1 as an i8 and now must truncate it back to i1
33080b57cec5SDimitry Andric             if (EltVT == MVT::i1)
33090b57cec5SDimitry Andric               Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
33100b57cec5SDimitry Andric             // v2f16 was loaded as an i32. Now we must bitcast it back.
33115f757f3fSDimitry Andric             else if (EltVT != LoadVT)
331206c3fb27SDimitry Andric               Elt = DAG.getNode(ISD::BITCAST, dl, EltVT, Elt);
3313fcaf7f86SDimitry Andric 
3314fcaf7f86SDimitry Andric             // If a promoted integer type is used, truncate down to the original
3315fcaf7f86SDimitry Andric             MVT PromotedVT;
3316fcaf7f86SDimitry Andric             if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
3317fcaf7f86SDimitry Andric               Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
3318fcaf7f86SDimitry Andric             }
3319fcaf7f86SDimitry Andric 
33200b57cec5SDimitry Andric             // Extend the element if necessary (e.g. an i8 is loaded
33210b57cec5SDimitry Andric             // into an i16 register)
33220b57cec5SDimitry Andric             if (Ins[InsIdx].VT.isInteger() &&
3323e8d8bef9SDimitry Andric                 Ins[InsIdx].VT.getFixedSizeInBits() >
3324e8d8bef9SDimitry Andric                     LoadVT.getFixedSizeInBits()) {
33250b57cec5SDimitry Andric               unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
33260b57cec5SDimitry Andric                                                            : ISD::ZERO_EXTEND;
33270b57cec5SDimitry Andric               Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
33280b57cec5SDimitry Andric             }
33290b57cec5SDimitry Andric             InVals.push_back(Elt);
33300b57cec5SDimitry Andric           }
33310b57cec5SDimitry Andric 
33320b57cec5SDimitry Andric           // Reset vector tracking state.
33330b57cec5SDimitry Andric           VecIdx = -1;
33340b57cec5SDimitry Andric         }
33350b57cec5SDimitry Andric         ++InsIdx;
33360b57cec5SDimitry Andric       }
33370b57cec5SDimitry Andric       if (VTs.size() > 0)
33380b57cec5SDimitry Andric         --InsIdx;
33390b57cec5SDimitry Andric       continue;
33400b57cec5SDimitry Andric     }
33410b57cec5SDimitry Andric 
33420b57cec5SDimitry Andric     // Param has ByVal attribute
33430b57cec5SDimitry Andric     // Return MoveParam(param symbol).
33440b57cec5SDimitry Andric     // Ideally, the param symbol can be returned directly,
33450b57cec5SDimitry Andric     // but when SDNode builder decides to use it in a CopyToReg(),
33460b57cec5SDimitry Andric     // machine instruction fails because TargetExternalSymbol
33470b57cec5SDimitry Andric     // (not lowered) is target dependent, and CopyToReg assumes
33480b57cec5SDimitry Andric     // the source is lowered.
33490b57cec5SDimitry Andric     EVT ObjectVT = getValueType(DL, Ty);
33500b57cec5SDimitry Andric     assert(ObjectVT == Ins[InsIdx].VT &&
33510b57cec5SDimitry Andric            "Ins type did not match function type");
33520fca6ea1SDimitry Andric     SDValue Arg = getParamSymbol(DAG, i, PtrVT);
33530b57cec5SDimitry Andric     SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
33540b57cec5SDimitry Andric     if (p.getNode())
33550fca6ea1SDimitry Andric       p.getNode()->setIROrder(i + 1);
33560b57cec5SDimitry Andric     InVals.push_back(p);
33570b57cec5SDimitry Andric   }
33580b57cec5SDimitry Andric 
33590b57cec5SDimitry Andric   if (!OutChains.empty())
33600b57cec5SDimitry Andric     DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
33610b57cec5SDimitry Andric 
33620b57cec5SDimitry Andric   return Chain;
33630b57cec5SDimitry Andric }
33640b57cec5SDimitry Andric 
33650fca6ea1SDimitry Andric // Use byte-store when the param adress of the return value is unaligned.
33660fca6ea1SDimitry Andric // This may happen when the return value is a field of a packed structure.
33670fca6ea1SDimitry Andric static SDValue LowerUnalignedStoreRet(SelectionDAG &DAG, SDValue Chain,
33680fca6ea1SDimitry Andric                                       uint64_t Offset, EVT ElementType,
33690fca6ea1SDimitry Andric                                       SDValue RetVal, const SDLoc &dl) {
33700fca6ea1SDimitry Andric   // Bit logic only works on integer types
33710fca6ea1SDimitry Andric   if (adjustElementType(ElementType))
33720fca6ea1SDimitry Andric     RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal);
33730fca6ea1SDimitry Andric 
33740fca6ea1SDimitry Andric   // Store each byte
33750fca6ea1SDimitry Andric   for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
33760fca6ea1SDimitry Andric     // Shift the byte to the last byte position
33770fca6ea1SDimitry Andric     SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, RetVal,
33780fca6ea1SDimitry Andric                                    DAG.getConstant(i * 8, dl, MVT::i32));
33790fca6ea1SDimitry Andric     SDValue StoreOperands[] = {Chain, DAG.getConstant(Offset + i, dl, MVT::i32),
33800fca6ea1SDimitry Andric                                ShiftVal};
33810fca6ea1SDimitry Andric     // Trunc store only the last byte by using
33820fca6ea1SDimitry Andric     //     st.param.b8
33830fca6ea1SDimitry Andric     // The register type can be larger than b8.
33840fca6ea1SDimitry Andric     Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
33850fca6ea1SDimitry Andric                                     DAG.getVTList(MVT::Other), StoreOperands,
33860fca6ea1SDimitry Andric                                     MVT::i8, MachinePointerInfo(), std::nullopt,
33870fca6ea1SDimitry Andric                                     MachineMemOperand::MOStore);
33880fca6ea1SDimitry Andric   }
33890fca6ea1SDimitry Andric   return Chain;
33900fca6ea1SDimitry Andric }
33910fca6ea1SDimitry Andric 
33920b57cec5SDimitry Andric SDValue
33930b57cec5SDimitry Andric NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
33940b57cec5SDimitry Andric                                  bool isVarArg,
33950b57cec5SDimitry Andric                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
33960b57cec5SDimitry Andric                                  const SmallVectorImpl<SDValue> &OutVals,
33970b57cec5SDimitry Andric                                  const SDLoc &dl, SelectionDAG &DAG) const {
339881ad6265SDimitry Andric   const MachineFunction &MF = DAG.getMachineFunction();
339981ad6265SDimitry Andric   const Function &F = MF.getFunction();
34000b57cec5SDimitry Andric   Type *RetTy = MF.getFunction().getReturnType();
34010b57cec5SDimitry Andric 
34020b57cec5SDimitry Andric   bool isABI = (STI.getSmVersion() >= 20);
34030b57cec5SDimitry Andric   assert(isABI && "Non-ABI compilation is not supported");
34040b57cec5SDimitry Andric   if (!isABI)
34050b57cec5SDimitry Andric     return Chain;
34060b57cec5SDimitry Andric 
3407fe6060f1SDimitry Andric   const DataLayout &DL = DAG.getDataLayout();
3408fcaf7f86SDimitry Andric   SmallVector<SDValue, 16> PromotedOutVals;
34090b57cec5SDimitry Andric   SmallVector<EVT, 16> VTs;
34100b57cec5SDimitry Andric   SmallVector<uint64_t, 16> Offsets;
34110b57cec5SDimitry Andric   ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
34120b57cec5SDimitry Andric   assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
34130b57cec5SDimitry Andric 
3414fcaf7f86SDimitry Andric   for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
3415fcaf7f86SDimitry Andric     SDValue PromotedOutVal = OutVals[i];
3416fcaf7f86SDimitry Andric     MVT PromotedVT;
3417fcaf7f86SDimitry Andric     if (PromoteScalarIntegerPTX(VTs[i], &PromotedVT)) {
3418fcaf7f86SDimitry Andric       VTs[i] = EVT(PromotedVT);
3419fcaf7f86SDimitry Andric     }
3420fcaf7f86SDimitry Andric     if (PromoteScalarIntegerPTX(PromotedOutVal.getValueType(), &PromotedVT)) {
3421fcaf7f86SDimitry Andric       llvm::ISD::NodeType Ext =
3422fcaf7f86SDimitry Andric           Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3423fcaf7f86SDimitry Andric       PromotedOutVal = DAG.getNode(Ext, dl, PromotedVT, PromotedOutVal);
3424fcaf7f86SDimitry Andric     }
3425fcaf7f86SDimitry Andric     PromotedOutVals.push_back(PromotedOutVal);
3426fcaf7f86SDimitry Andric   }
3427fcaf7f86SDimitry Andric 
34280b57cec5SDimitry Andric   auto VectorInfo = VectorizePTXValueVTs(
342981ad6265SDimitry Andric       VTs, Offsets,
343081ad6265SDimitry Andric       RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL)
343181ad6265SDimitry Andric                        : Align(1));
34320b57cec5SDimitry Andric 
34330b57cec5SDimitry Andric   // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
34340b57cec5SDimitry Andric   // 32-bits are sign extended or zero extended, depending on whether
34350b57cec5SDimitry Andric   // they are signed or unsigned types.
34360b57cec5SDimitry Andric   bool ExtendIntegerRetVal =
34370b57cec5SDimitry Andric       RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
34380b57cec5SDimitry Andric 
34390b57cec5SDimitry Andric   SmallVector<SDValue, 6> StoreOperands;
34400b57cec5SDimitry Andric   for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
3441fcaf7f86SDimitry Andric     SDValue OutVal = OutVals[i];
3442fcaf7f86SDimitry Andric     SDValue RetVal = PromotedOutVals[i];
3443fcaf7f86SDimitry Andric 
34440b57cec5SDimitry Andric     if (ExtendIntegerRetVal) {
34450b57cec5SDimitry Andric       RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
34460b57cec5SDimitry Andric                                                   : ISD::ZERO_EXTEND,
34470b57cec5SDimitry Andric                            dl, MVT::i32, RetVal);
3448fcaf7f86SDimitry Andric     } else if (OutVal.getValueSizeInBits() < 16) {
34490b57cec5SDimitry Andric       // Use 16-bit registers for small load-stores as it's the
34500b57cec5SDimitry Andric       // smallest general purpose register size supported by NVPTX.
34510b57cec5SDimitry Andric       RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
34520b57cec5SDimitry Andric     }
34530b57cec5SDimitry Andric 
34540fca6ea1SDimitry Andric     // If we have a PVF_SCALAR entry, it may not even be sufficiently aligned
34550fca6ea1SDimitry Andric     // for a scalar store. In such cases, fall back to byte stores.
34560fca6ea1SDimitry Andric     if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType()) {
34570fca6ea1SDimitry Andric       EVT ElementType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
34580fca6ea1SDimitry Andric       Align ElementTypeAlign =
34590fca6ea1SDimitry Andric           DL.getABITypeAlign(ElementType.getTypeForEVT(RetTy->getContext()));
34600fca6ea1SDimitry Andric       Align ElementAlign =
34610fca6ea1SDimitry Andric           commonAlignment(DL.getABITypeAlign(RetTy), Offsets[i]);
34620fca6ea1SDimitry Andric       if (ElementAlign < ElementTypeAlign) {
34630fca6ea1SDimitry Andric         assert(StoreOperands.empty() && "Orphaned operand list.");
34640fca6ea1SDimitry Andric         Chain = LowerUnalignedStoreRet(DAG, Chain, Offsets[i], ElementType,
34650fca6ea1SDimitry Andric                                        RetVal, dl);
34660fca6ea1SDimitry Andric 
34670fca6ea1SDimitry Andric         // The call to LowerUnalignedStoreRet inserted the necessary SDAG nodes
34680fca6ea1SDimitry Andric         // into the graph, so just move on to the next element.
34690fca6ea1SDimitry Andric         continue;
34700fca6ea1SDimitry Andric       }
34710fca6ea1SDimitry Andric     }
34720fca6ea1SDimitry Andric 
34730fca6ea1SDimitry Andric     // New load/store. Record chain and offset operands.
34740fca6ea1SDimitry Andric     if (VectorInfo[i] & PVF_FIRST) {
34750fca6ea1SDimitry Andric       assert(StoreOperands.empty() && "Orphaned operand list.");
34760fca6ea1SDimitry Andric       StoreOperands.push_back(Chain);
34770fca6ea1SDimitry Andric       StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
34780fca6ea1SDimitry Andric     }
34790fca6ea1SDimitry Andric 
34800b57cec5SDimitry Andric     // Record the value to return.
34810b57cec5SDimitry Andric     StoreOperands.push_back(RetVal);
34820b57cec5SDimitry Andric 
34830b57cec5SDimitry Andric     // That's the last element of this store op.
34840b57cec5SDimitry Andric     if (VectorInfo[i] & PVF_LAST) {
34850b57cec5SDimitry Andric       NVPTXISD::NodeType Op;
34860b57cec5SDimitry Andric       unsigned NumElts = StoreOperands.size() - 2;
34870b57cec5SDimitry Andric       switch (NumElts) {
34880b57cec5SDimitry Andric       case 1:
34890b57cec5SDimitry Andric         Op = NVPTXISD::StoreRetval;
34900b57cec5SDimitry Andric         break;
34910b57cec5SDimitry Andric       case 2:
34920b57cec5SDimitry Andric         Op = NVPTXISD::StoreRetvalV2;
34930b57cec5SDimitry Andric         break;
34940b57cec5SDimitry Andric       case 4:
34950b57cec5SDimitry Andric         Op = NVPTXISD::StoreRetvalV4;
34960b57cec5SDimitry Andric         break;
34970b57cec5SDimitry Andric       default:
34980b57cec5SDimitry Andric         llvm_unreachable("Invalid vector info.");
34990b57cec5SDimitry Andric       }
35000b57cec5SDimitry Andric 
35010b57cec5SDimitry Andric       // Adjust type of load/store op if we've extended the scalar
35020b57cec5SDimitry Andric       // return value.
35030b57cec5SDimitry Andric       EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
35045ffd83dbSDimitry Andric       Chain = DAG.getMemIntrinsicNode(
35055ffd83dbSDimitry Andric           Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType,
35065ffd83dbSDimitry Andric           MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
35070b57cec5SDimitry Andric       // Cleanup vector state.
35080b57cec5SDimitry Andric       StoreOperands.clear();
35090b57cec5SDimitry Andric     }
35100b57cec5SDimitry Andric   }
35110b57cec5SDimitry Andric 
351206c3fb27SDimitry Andric   return DAG.getNode(NVPTXISD::RET_GLUE, dl, MVT::Other, Chain);
35130b57cec5SDimitry Andric }
35140b57cec5SDimitry Andric 
35150b57cec5SDimitry Andric void NVPTXTargetLowering::LowerAsmOperandForConstraint(
35165f757f3fSDimitry Andric     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
35170b57cec5SDimitry Andric     SelectionDAG &DAG) const {
35185f757f3fSDimitry Andric   if (Constraint.size() > 1)
35190b57cec5SDimitry Andric     return;
35200b57cec5SDimitry Andric   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
35210b57cec5SDimitry Andric }
35220b57cec5SDimitry Andric 
35230b57cec5SDimitry Andric static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
35240b57cec5SDimitry Andric   switch (Intrinsic) {
35250b57cec5SDimitry Andric   default:
35260b57cec5SDimitry Andric     return 0;
35270b57cec5SDimitry Andric 
35280b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4f32_s32:
35290b57cec5SDimitry Andric     return NVPTXISD::Tex1DFloatS32;
35300b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4f32_f32:
35310b57cec5SDimitry Andric     return NVPTXISD::Tex1DFloatFloat;
35320b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
35330b57cec5SDimitry Andric     return NVPTXISD::Tex1DFloatFloatLevel;
35340b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
35350b57cec5SDimitry Andric     return NVPTXISD::Tex1DFloatFloatGrad;
35360b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4s32_s32:
35370b57cec5SDimitry Andric     return NVPTXISD::Tex1DS32S32;
35380b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4s32_f32:
35390b57cec5SDimitry Andric     return NVPTXISD::Tex1DS32Float;
35400b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
35410b57cec5SDimitry Andric     return NVPTXISD::Tex1DS32FloatLevel;
35420b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
35430b57cec5SDimitry Andric     return NVPTXISD::Tex1DS32FloatGrad;
35440b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4u32_s32:
35450b57cec5SDimitry Andric     return NVPTXISD::Tex1DU32S32;
35460b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4u32_f32:
35470b57cec5SDimitry Andric     return NVPTXISD::Tex1DU32Float;
35480b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
35490b57cec5SDimitry Andric     return NVPTXISD::Tex1DU32FloatLevel;
35500b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
35510b57cec5SDimitry Andric     return NVPTXISD::Tex1DU32FloatGrad;
35520b57cec5SDimitry Andric 
35530b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
35540b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayFloatS32;
35550b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
35560b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayFloatFloat;
35570b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
35580b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayFloatFloatLevel;
35590b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
35600b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayFloatFloatGrad;
35610b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
35620b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayS32S32;
35630b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
35640b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayS32Float;
35650b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
35660b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayS32FloatLevel;
35670b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
35680b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayS32FloatGrad;
35690b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
35700b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayU32S32;
35710b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
35720b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayU32Float;
35730b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
35740b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayU32FloatLevel;
35750b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
35760b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayU32FloatGrad;
35770b57cec5SDimitry Andric 
35780b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4f32_s32:
35790b57cec5SDimitry Andric     return NVPTXISD::Tex2DFloatS32;
35800b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4f32_f32:
35810b57cec5SDimitry Andric     return NVPTXISD::Tex2DFloatFloat;
35820b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
35830b57cec5SDimitry Andric     return NVPTXISD::Tex2DFloatFloatLevel;
35840b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
35850b57cec5SDimitry Andric     return NVPTXISD::Tex2DFloatFloatGrad;
35860b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4s32_s32:
35870b57cec5SDimitry Andric     return NVPTXISD::Tex2DS32S32;
35880b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4s32_f32:
35890b57cec5SDimitry Andric     return NVPTXISD::Tex2DS32Float;
35900b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
35910b57cec5SDimitry Andric     return NVPTXISD::Tex2DS32FloatLevel;
35920b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
35930b57cec5SDimitry Andric     return NVPTXISD::Tex2DS32FloatGrad;
35940b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4u32_s32:
35950b57cec5SDimitry Andric     return NVPTXISD::Tex2DU32S32;
35960b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4u32_f32:
35970b57cec5SDimitry Andric     return NVPTXISD::Tex2DU32Float;
35980b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
35990b57cec5SDimitry Andric     return NVPTXISD::Tex2DU32FloatLevel;
36000b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
36010b57cec5SDimitry Andric     return NVPTXISD::Tex2DU32FloatGrad;
36020b57cec5SDimitry Andric 
36030b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
36040b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayFloatS32;
36050b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
36060b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayFloatFloat;
36070b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
36080b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayFloatFloatLevel;
36090b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
36100b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayFloatFloatGrad;
36110b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
36120b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayS32S32;
36130b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
36140b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayS32Float;
36150b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
36160b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayS32FloatLevel;
36170b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
36180b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayS32FloatGrad;
36190b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
36200b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayU32S32;
36210b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
36220b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayU32Float;
36230b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
36240b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayU32FloatLevel;
36250b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
36260b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayU32FloatGrad;
36270b57cec5SDimitry Andric 
36280b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4f32_s32:
36290b57cec5SDimitry Andric     return NVPTXISD::Tex3DFloatS32;
36300b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4f32_f32:
36310b57cec5SDimitry Andric     return NVPTXISD::Tex3DFloatFloat;
36320b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
36330b57cec5SDimitry Andric     return NVPTXISD::Tex3DFloatFloatLevel;
36340b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
36350b57cec5SDimitry Andric     return NVPTXISD::Tex3DFloatFloatGrad;
36360b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4s32_s32:
36370b57cec5SDimitry Andric     return NVPTXISD::Tex3DS32S32;
36380b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4s32_f32:
36390b57cec5SDimitry Andric     return NVPTXISD::Tex3DS32Float;
36400b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
36410b57cec5SDimitry Andric     return NVPTXISD::Tex3DS32FloatLevel;
36420b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
36430b57cec5SDimitry Andric     return NVPTXISD::Tex3DS32FloatGrad;
36440b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4u32_s32:
36450b57cec5SDimitry Andric     return NVPTXISD::Tex3DU32S32;
36460b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4u32_f32:
36470b57cec5SDimitry Andric     return NVPTXISD::Tex3DU32Float;
36480b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
36490b57cec5SDimitry Andric     return NVPTXISD::Tex3DU32FloatLevel;
36500b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
36510b57cec5SDimitry Andric     return NVPTXISD::Tex3DU32FloatGrad;
36520b57cec5SDimitry Andric 
36530b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4f32_f32:
36540b57cec5SDimitry Andric     return NVPTXISD::TexCubeFloatFloat;
36550b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
36560b57cec5SDimitry Andric     return NVPTXISD::TexCubeFloatFloatLevel;
36570b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4s32_f32:
36580b57cec5SDimitry Andric     return NVPTXISD::TexCubeS32Float;
36590b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
36600b57cec5SDimitry Andric     return NVPTXISD::TexCubeS32FloatLevel;
36610b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4u32_f32:
36620b57cec5SDimitry Andric     return NVPTXISD::TexCubeU32Float;
36630b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
36640b57cec5SDimitry Andric     return NVPTXISD::TexCubeU32FloatLevel;
36650b57cec5SDimitry Andric 
36660b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
36670b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayFloatFloat;
36680b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
36690b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayFloatFloatLevel;
36700b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
36710b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayS32Float;
36720b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
36730b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayS32FloatLevel;
36740b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
36750b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayU32Float;
36760b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
36770b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayU32FloatLevel;
36780b57cec5SDimitry Andric 
36790b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
36800b57cec5SDimitry Andric     return NVPTXISD::Tld4R2DFloatFloat;
36810b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
36820b57cec5SDimitry Andric     return NVPTXISD::Tld4G2DFloatFloat;
36830b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
36840b57cec5SDimitry Andric     return NVPTXISD::Tld4B2DFloatFloat;
36850b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
36860b57cec5SDimitry Andric     return NVPTXISD::Tld4A2DFloatFloat;
36870b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
36880b57cec5SDimitry Andric     return NVPTXISD::Tld4R2DS64Float;
36890b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
36900b57cec5SDimitry Andric     return NVPTXISD::Tld4G2DS64Float;
36910b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
36920b57cec5SDimitry Andric     return NVPTXISD::Tld4B2DS64Float;
36930b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
36940b57cec5SDimitry Andric     return NVPTXISD::Tld4A2DS64Float;
36950b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
36960b57cec5SDimitry Andric     return NVPTXISD::Tld4R2DU64Float;
36970b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
36980b57cec5SDimitry Andric     return NVPTXISD::Tld4G2DU64Float;
36990b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
37000b57cec5SDimitry Andric     return NVPTXISD::Tld4B2DU64Float;
37010b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
37020b57cec5SDimitry Andric     return NVPTXISD::Tld4A2DU64Float;
37030b57cec5SDimitry Andric 
37040b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
37050b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DFloatS32;
37060b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
37070b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DFloatFloat;
37080b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
37090b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DFloatFloatLevel;
37100b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
37110b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DFloatFloatGrad;
37120b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
37130b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DS32S32;
37140b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
37150b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DS32Float;
37160b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
37170b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DS32FloatLevel;
37180b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
37190b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DS32FloatGrad;
37200b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
37210b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DU32S32;
37220b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
37230b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DU32Float;
37240b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
37250b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DU32FloatLevel;
37260b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
37270b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DU32FloatGrad;
37280b57cec5SDimitry Andric 
37290b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
37300b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayFloatS32;
37310b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
37320b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayFloatFloat;
37330b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
37340b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
37350b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
37360b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
37370b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
37380b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayS32S32;
37390b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
37400b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayS32Float;
37410b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
37420b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayS32FloatLevel;
37430b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
37440b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayS32FloatGrad;
37450b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
37460b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayU32S32;
37470b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
37480b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayU32Float;
37490b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
37500b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayU32FloatLevel;
37510b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
37520b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayU32FloatGrad;
37530b57cec5SDimitry Andric 
37540b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
37550b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DFloatS32;
37560b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
37570b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DFloatFloat;
37580b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
37590b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DFloatFloatLevel;
37600b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
37610b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DFloatFloatGrad;
37620b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
37630b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DS32S32;
37640b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
37650b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DS32Float;
37660b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
37670b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DS32FloatLevel;
37680b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
37690b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DS32FloatGrad;
37700b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
37710b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DU32S32;
37720b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
37730b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DU32Float;
37740b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
37750b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DU32FloatLevel;
37760b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
37770b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DU32FloatGrad;
37780b57cec5SDimitry Andric 
37790b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
37800b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayFloatS32;
37810b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
37820b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayFloatFloat;
37830b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
37840b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
37850b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
37860b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
37870b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
37880b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayS32S32;
37890b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
37900b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayS32Float;
37910b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
37920b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayS32FloatLevel;
37930b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
37940b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayS32FloatGrad;
37950b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
37960b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayU32S32;
37970b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
37980b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayU32Float;
37990b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
38000b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayU32FloatLevel;
38010b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
38020b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayU32FloatGrad;
38030b57cec5SDimitry Andric 
38040b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
38050b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DFloatS32;
38060b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
38070b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DFloatFloat;
38080b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
38090b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DFloatFloatLevel;
38100b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
38110b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DFloatFloatGrad;
38120b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
38130b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DS32S32;
38140b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
38150b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DS32Float;
38160b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
38170b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DS32FloatLevel;
38180b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
38190b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DS32FloatGrad;
38200b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
38210b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DU32S32;
38220b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
38230b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DU32Float;
38240b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
38250b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DU32FloatLevel;
38260b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
38270b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DU32FloatGrad;
38280b57cec5SDimitry Andric 
38290b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
38300b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeFloatFloat;
38310b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
38320b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
38330b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
38340b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeS32Float;
38350b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
38360b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeS32FloatLevel;
38370b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
38380b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeU32Float;
38390b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
38400b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeU32FloatLevel;
38410b57cec5SDimitry Andric 
38420b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
38430b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
38440b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
38450b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
38460b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
38470b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayS32Float;
38480b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
38490b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
38500b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
38510b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayU32Float;
38520b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
38530b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
38540b57cec5SDimitry Andric 
38557a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
38567a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeFloatFloatGrad;
38577a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
38587a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeS32FloatGrad;
38597a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
38607a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeU32FloatGrad;
38617a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
38627a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad;
38637a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
38647a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayS32FloatGrad;
38657a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
38667a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayU32FloatGrad;
38677a6dacacSDimitry Andric 
38680b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
38690b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedR2DFloatFloat;
38700b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
38710b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedG2DFloatFloat;
38720b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
38730b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedB2DFloatFloat;
38740b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
38750b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedA2DFloatFloat;
38760b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
38770b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedR2DS64Float;
38780b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
38790b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedG2DS64Float;
38800b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
38810b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedB2DS64Float;
38820b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
38830b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedA2DS64Float;
38840b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
38850b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedR2DU64Float;
38860b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
38870b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedG2DU64Float;
38880b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
38890b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedB2DU64Float;
38900b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
38910b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedA2DU64Float;
38920b57cec5SDimitry Andric   }
38930b57cec5SDimitry Andric }
38940b57cec5SDimitry Andric 
38950b57cec5SDimitry Andric static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
38960b57cec5SDimitry Andric   switch (Intrinsic) {
38970b57cec5SDimitry Andric   default:
38980b57cec5SDimitry Andric     return 0;
38990b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_clamp:
39000b57cec5SDimitry Andric     return NVPTXISD::Suld1DI8Clamp;
39010b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_clamp:
39020b57cec5SDimitry Andric     return NVPTXISD::Suld1DI16Clamp;
39030b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_clamp:
39040b57cec5SDimitry Andric     return NVPTXISD::Suld1DI32Clamp;
39050b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_clamp:
39060b57cec5SDimitry Andric     return NVPTXISD::Suld1DI64Clamp;
39070b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
39080b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I8Clamp;
39090b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
39100b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I16Clamp;
39110b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
39120b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I32Clamp;
39130b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
39140b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I64Clamp;
39150b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
39160b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I8Clamp;
39170b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
39180b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I16Clamp;
39190b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
39200b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I32Clamp;
39210b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
39220b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI8Clamp;
39230b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
39240b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI16Clamp;
39250b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
39260b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI32Clamp;
39270b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
39280b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI64Clamp;
39290b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
39300b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I8Clamp;
39310b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
39320b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I16Clamp;
39330b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
39340b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I32Clamp;
39350b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
39360b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I64Clamp;
39370b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
39380b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I8Clamp;
39390b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
39400b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I16Clamp;
39410b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
39420b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I32Clamp;
39430b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_clamp:
39440b57cec5SDimitry Andric     return NVPTXISD::Suld2DI8Clamp;
39450b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_clamp:
39460b57cec5SDimitry Andric     return NVPTXISD::Suld2DI16Clamp;
39470b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_clamp:
39480b57cec5SDimitry Andric     return NVPTXISD::Suld2DI32Clamp;
39490b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_clamp:
39500b57cec5SDimitry Andric     return NVPTXISD::Suld2DI64Clamp;
39510b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
39520b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I8Clamp;
39530b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
39540b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I16Clamp;
39550b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
39560b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I32Clamp;
39570b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
39580b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I64Clamp;
39590b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
39600b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I8Clamp;
39610b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
39620b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I16Clamp;
39630b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
39640b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I32Clamp;
39650b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
39660b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI8Clamp;
39670b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
39680b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI16Clamp;
39690b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
39700b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI32Clamp;
39710b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
39720b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI64Clamp;
39730b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
39740b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I8Clamp;
39750b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
39760b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I16Clamp;
39770b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
39780b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I32Clamp;
39790b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
39800b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I64Clamp;
39810b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
39820b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I8Clamp;
39830b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
39840b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I16Clamp;
39850b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
39860b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I32Clamp;
39870b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_clamp:
39880b57cec5SDimitry Andric     return NVPTXISD::Suld3DI8Clamp;
39890b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_clamp:
39900b57cec5SDimitry Andric     return NVPTXISD::Suld3DI16Clamp;
39910b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_clamp:
39920b57cec5SDimitry Andric     return NVPTXISD::Suld3DI32Clamp;
39930b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_clamp:
39940b57cec5SDimitry Andric     return NVPTXISD::Suld3DI64Clamp;
39950b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
39960b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I8Clamp;
39970b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
39980b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I16Clamp;
39990b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
40000b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I32Clamp;
40010b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
40020b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I64Clamp;
40030b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
40040b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I8Clamp;
40050b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
40060b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I16Clamp;
40070b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
40080b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I32Clamp;
40090b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_trap:
40100b57cec5SDimitry Andric     return NVPTXISD::Suld1DI8Trap;
40110b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_trap:
40120b57cec5SDimitry Andric     return NVPTXISD::Suld1DI16Trap;
40130b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_trap:
40140b57cec5SDimitry Andric     return NVPTXISD::Suld1DI32Trap;
40150b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_trap:
40160b57cec5SDimitry Andric     return NVPTXISD::Suld1DI64Trap;
40170b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_trap:
40180b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I8Trap;
40190b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_trap:
40200b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I16Trap;
40210b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_trap:
40220b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I32Trap;
40230b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_trap:
40240b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I64Trap;
40250b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_trap:
40260b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I8Trap;
40270b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_trap:
40280b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I16Trap;
40290b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_trap:
40300b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I32Trap;
40310b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_trap:
40320b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI8Trap;
40330b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_trap:
40340b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI16Trap;
40350b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_trap:
40360b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI32Trap;
40370b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_trap:
40380b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI64Trap;
40390b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
40400b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I8Trap;
40410b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
40420b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I16Trap;
40430b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
40440b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I32Trap;
40450b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
40460b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I64Trap;
40470b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
40480b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I8Trap;
40490b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
40500b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I16Trap;
40510b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
40520b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I32Trap;
40530b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_trap:
40540b57cec5SDimitry Andric     return NVPTXISD::Suld2DI8Trap;
40550b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_trap:
40560b57cec5SDimitry Andric     return NVPTXISD::Suld2DI16Trap;
40570b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_trap:
40580b57cec5SDimitry Andric     return NVPTXISD::Suld2DI32Trap;
40590b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_trap:
40600b57cec5SDimitry Andric     return NVPTXISD::Suld2DI64Trap;
40610b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_trap:
40620b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I8Trap;
40630b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_trap:
40640b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I16Trap;
40650b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_trap:
40660b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I32Trap;
40670b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_trap:
40680b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I64Trap;
40690b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_trap:
40700b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I8Trap;
40710b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_trap:
40720b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I16Trap;
40730b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_trap:
40740b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I32Trap;
40750b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_trap:
40760b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI8Trap;
40770b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_trap:
40780b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI16Trap;
40790b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_trap:
40800b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI32Trap;
40810b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_trap:
40820b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI64Trap;
40830b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
40840b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I8Trap;
40850b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
40860b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I16Trap;
40870b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
40880b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I32Trap;
40890b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
40900b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I64Trap;
40910b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
40920b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I8Trap;
40930b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
40940b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I16Trap;
40950b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
40960b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I32Trap;
40970b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_trap:
40980b57cec5SDimitry Andric     return NVPTXISD::Suld3DI8Trap;
40990b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_trap:
41000b57cec5SDimitry Andric     return NVPTXISD::Suld3DI16Trap;
41010b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_trap:
41020b57cec5SDimitry Andric     return NVPTXISD::Suld3DI32Trap;
41030b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_trap:
41040b57cec5SDimitry Andric     return NVPTXISD::Suld3DI64Trap;
41050b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_trap:
41060b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I8Trap;
41070b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_trap:
41080b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I16Trap;
41090b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_trap:
41100b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I32Trap;
41110b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_trap:
41120b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I64Trap;
41130b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_trap:
41140b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I8Trap;
41150b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_trap:
41160b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I16Trap;
41170b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_trap:
41180b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I32Trap;
41190b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_zero:
41200b57cec5SDimitry Andric     return NVPTXISD::Suld1DI8Zero;
41210b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_zero:
41220b57cec5SDimitry Andric     return NVPTXISD::Suld1DI16Zero;
41230b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_zero:
41240b57cec5SDimitry Andric     return NVPTXISD::Suld1DI32Zero;
41250b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_zero:
41260b57cec5SDimitry Andric     return NVPTXISD::Suld1DI64Zero;
41270b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_zero:
41280b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I8Zero;
41290b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_zero:
41300b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I16Zero;
41310b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_zero:
41320b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I32Zero;
41330b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_zero:
41340b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I64Zero;
41350b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_zero:
41360b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I8Zero;
41370b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_zero:
41380b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I16Zero;
41390b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_zero:
41400b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I32Zero;
41410b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_zero:
41420b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI8Zero;
41430b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_zero:
41440b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI16Zero;
41450b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_zero:
41460b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI32Zero;
41470b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_zero:
41480b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI64Zero;
41490b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
41500b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I8Zero;
41510b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
41520b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I16Zero;
41530b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
41540b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I32Zero;
41550b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
41560b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I64Zero;
41570b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
41580b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I8Zero;
41590b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
41600b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I16Zero;
41610b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
41620b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I32Zero;
41630b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_zero:
41640b57cec5SDimitry Andric     return NVPTXISD::Suld2DI8Zero;
41650b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_zero:
41660b57cec5SDimitry Andric     return NVPTXISD::Suld2DI16Zero;
41670b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_zero:
41680b57cec5SDimitry Andric     return NVPTXISD::Suld2DI32Zero;
41690b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_zero:
41700b57cec5SDimitry Andric     return NVPTXISD::Suld2DI64Zero;
41710b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_zero:
41720b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I8Zero;
41730b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_zero:
41740b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I16Zero;
41750b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_zero:
41760b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I32Zero;
41770b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_zero:
41780b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I64Zero;
41790b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_zero:
41800b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I8Zero;
41810b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_zero:
41820b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I16Zero;
41830b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_zero:
41840b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I32Zero;
41850b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_zero:
41860b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI8Zero;
41870b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_zero:
41880b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI16Zero;
41890b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_zero:
41900b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI32Zero;
41910b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_zero:
41920b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI64Zero;
41930b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
41940b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I8Zero;
41950b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
41960b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I16Zero;
41970b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
41980b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I32Zero;
41990b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
42000b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I64Zero;
42010b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
42020b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I8Zero;
42030b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
42040b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I16Zero;
42050b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
42060b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I32Zero;
42070b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_zero:
42080b57cec5SDimitry Andric     return NVPTXISD::Suld3DI8Zero;
42090b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_zero:
42100b57cec5SDimitry Andric     return NVPTXISD::Suld3DI16Zero;
42110b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_zero:
42120b57cec5SDimitry Andric     return NVPTXISD::Suld3DI32Zero;
42130b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_zero:
42140b57cec5SDimitry Andric     return NVPTXISD::Suld3DI64Zero;
42150b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_zero:
42160b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I8Zero;
42170b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_zero:
42180b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I16Zero;
42190b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_zero:
42200b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I32Zero;
42210b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_zero:
42220b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I64Zero;
42230b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_zero:
42240b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I8Zero;
42250b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_zero:
42260b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I16Zero;
42270b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_zero:
42280b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I32Zero;
42290b57cec5SDimitry Andric   }
42300b57cec5SDimitry Andric }
42310b57cec5SDimitry Andric 
42320b57cec5SDimitry Andric // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
42330b57cec5SDimitry Andric // TgtMemIntrinsic
42340b57cec5SDimitry Andric // because we need the information that is only available in the "Value" type
42350b57cec5SDimitry Andric // of destination
42360b57cec5SDimitry Andric // pointer. In particular, the address space information.
42370b57cec5SDimitry Andric bool NVPTXTargetLowering::getTgtMemIntrinsic(
42380b57cec5SDimitry Andric     IntrinsicInfo &Info, const CallInst &I,
42390b57cec5SDimitry Andric     MachineFunction &MF, unsigned Intrinsic) const {
42400b57cec5SDimitry Andric   switch (Intrinsic) {
42410b57cec5SDimitry Andric   default:
42420b57cec5SDimitry Andric     return false;
42430b57cec5SDimitry Andric   case Intrinsic::nvvm_match_all_sync_i32p:
42440b57cec5SDimitry Andric   case Intrinsic::nvvm_match_all_sync_i64p:
42450b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
42460b57cec5SDimitry Andric     // memVT is bogus. These intrinsics have IntrInaccessibleMemOnly attribute
42470b57cec5SDimitry Andric     // in order to model data exchange with other threads, but perform no real
42480b57cec5SDimitry Andric     // memory accesses.
42490b57cec5SDimitry Andric     Info.memVT = MVT::i1;
42500b57cec5SDimitry Andric 
42510b57cec5SDimitry Andric     // Our result depends on both our and other thread's arguments.
42520b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
42530b57cec5SDimitry Andric     return true;
42540b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col:
42550b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row:
42560b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride:
42570b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride:
42580b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col:
42590b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row:
42600b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride:
42610b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride:
42620b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col:
42630b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row:
42640b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride:
42650b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride:
42660b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col:
42670b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row:
42680b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride:
42690b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride:
42700b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col:
42710b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row:
42720b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride:
42730b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride:
42740b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col:
42750b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row:
42760b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride:
42770b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: {
42780b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
42790b57cec5SDimitry Andric     Info.memVT = MVT::v8f16;
42800b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
42810b57cec5SDimitry Andric     Info.offset = 0;
42820b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
42838bcb0991SDimitry Andric     Info.align = Align(16);
42840b57cec5SDimitry Andric     return true;
42850b57cec5SDimitry Andric   }
42860b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
42870b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
42880b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
42890b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
42900b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
42910b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
42920b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
42930b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
4294fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col:
4295fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride:
4296fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row:
4297fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride:
42980b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
42990b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
43000b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
43010b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
43020b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
43030b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
43040b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
4305fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row:
4306fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col:
4307fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride:
4308fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row:
4309fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: {
43100b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
43110b57cec5SDimitry Andric     Info.memVT = MVT::v2i32;
43120b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
43130b57cec5SDimitry Andric     Info.offset = 0;
43140b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
43158bcb0991SDimitry Andric     Info.align = Align(8);
43160b57cec5SDimitry Andric     return true;
43170b57cec5SDimitry Andric   }
43180b57cec5SDimitry Andric 
43190b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
43200b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
43210b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
43220b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
43230b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
43240b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
43250b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
43260b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
4327fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col:
4328fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride:
4329fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row:
4330fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride:
4331fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col:
4332fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride:
4333fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row:
4334fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride:
43350b57cec5SDimitry Andric 
43360b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
43370b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
43380b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
43390b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
43400b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
43410b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
43420b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
4343fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row:
4344fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col:
4345fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride:
4346fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row:
4347fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride:
4348fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
4349fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
4350fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
4351349cc55cSDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
4352349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
4353349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
43540b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
43550b57cec5SDimitry Andric     Info.memVT = MVT::v4i32;
43560b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
43570b57cec5SDimitry Andric     Info.offset = 0;
43580b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
43598bcb0991SDimitry Andric     Info.align = Align(16);
43600b57cec5SDimitry Andric     return true;
43610b57cec5SDimitry Andric   }
43620b57cec5SDimitry Andric 
43630b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
43640b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
43650b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
43660b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
43670b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
43680b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
43690b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
43700b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
43710b57cec5SDimitry Andric 
43720b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
43730b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
43740b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
43750b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
43760b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
43770b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
43780b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
43790b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
43800b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
43810b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
43820b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
43830b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
43840b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
43850b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
43860b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
43870b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
43880b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
43890b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
43900b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
4391349cc55cSDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
4392349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
4393349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
43940b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
43950b57cec5SDimitry Andric     Info.memVT = MVT::i32;
43960b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
43970b57cec5SDimitry Andric     Info.offset = 0;
43980b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
43998bcb0991SDimitry Andric     Info.align = Align(4);
44000b57cec5SDimitry Andric     return true;
44010b57cec5SDimitry Andric   }
44020b57cec5SDimitry Andric 
44030b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
44040b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
44050b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride:
44060b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride:
44070b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col:
44080b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row:
44090b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride:
44100b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride:
44110b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col:
44120b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row:
44130b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride:
44140b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: {
44150b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
44160b57cec5SDimitry Andric     Info.memVT = MVT::v4f16;
44170b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
44180b57cec5SDimitry Andric     Info.offset = 0;
44190b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
44208bcb0991SDimitry Andric     Info.align = Align(16);
44210b57cec5SDimitry Andric     return true;
44220b57cec5SDimitry Andric   }
44230b57cec5SDimitry Andric 
44240b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col:
44250b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row:
44260b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride:
44270b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride:
44280b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col:
44290b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row:
44300b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride:
44310b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride:
44320b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col:
44330b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row:
44340b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride:
4435fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride:
4436fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col:
4437fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row:
4438fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride:
4439fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: {
44400b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
44410b57cec5SDimitry Andric     Info.memVT = MVT::v8f32;
44420b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
44430b57cec5SDimitry Andric     Info.offset = 0;
44440b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
44458bcb0991SDimitry Andric     Info.align = Align(16);
44460b57cec5SDimitry Andric     return true;
44470b57cec5SDimitry Andric   }
44480b57cec5SDimitry Andric 
4449fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col:
4450fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride:
4451fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row:
4452fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride:
4453fe6060f1SDimitry Andric 
4454fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col:
4455fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride:
4456fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row:
4457fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride:
4458fe6060f1SDimitry Andric 
44590b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
44600b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
44610b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
44620b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
44630b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
44640b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
44650b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
44660b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
44670b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
44680b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
44690b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
44700b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
44710b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
44720b57cec5SDimitry Andric     Info.memVT = MVT::v8i32;
44730b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
44740b57cec5SDimitry Andric     Info.offset = 0;
44750b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
44768bcb0991SDimitry Andric     Info.align = Align(16);
44770b57cec5SDimitry Andric     return true;
44780b57cec5SDimitry Andric   }
44790b57cec5SDimitry Andric 
44800b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
44810b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
44820b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
44830b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
44840b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
44850b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
44860b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
4487349cc55cSDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
4488349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
4489349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
44900b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
44910b57cec5SDimitry Andric     Info.memVT = MVT::v2i32;
44920b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
44930b57cec5SDimitry Andric     Info.offset = 0;
44940b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
44958bcb0991SDimitry Andric     Info.align = Align(8);
44960b57cec5SDimitry Andric     return true;
44970b57cec5SDimitry Andric   }
44980b57cec5SDimitry Andric 
4499fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col:
4500fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride:
4501fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row:
4502fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride:
4503fe6060f1SDimitry Andric 
4504fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col:
4505fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride:
4506fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row:
4507fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: {
4508fe6060f1SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
4509fe6060f1SDimitry Andric     Info.memVT = MVT::f64;
4510fe6060f1SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
4511fe6060f1SDimitry Andric     Info.offset = 0;
4512fe6060f1SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
4513fe6060f1SDimitry Andric     Info.align = Align(8);
4514fe6060f1SDimitry Andric     return true;
4515fe6060f1SDimitry Andric   }
4516fe6060f1SDimitry Andric 
4517fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col:
4518fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride:
4519fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row:
4520fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: {
4521fe6060f1SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
4522fe6060f1SDimitry Andric     Info.memVT = MVT::v2f64;
4523fe6060f1SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
4524fe6060f1SDimitry Andric     Info.offset = 0;
4525fe6060f1SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
4526fe6060f1SDimitry Andric     Info.align = Align(16);
4527fe6060f1SDimitry Andric     return true;
4528fe6060f1SDimitry Andric   }
4529fe6060f1SDimitry Andric 
45300b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
45310b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
45320b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
45330b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride:
45340b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col:
45350b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row:
45360b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride:
45370b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride:
45380b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col:
45390b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row:
45400b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride:
45410b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: {
45420b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
45430b57cec5SDimitry Andric     Info.memVT = MVT::v4f16;
45440b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
45450b57cec5SDimitry Andric     Info.offset = 0;
45460b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
45478bcb0991SDimitry Andric     Info.align = Align(16);
45480b57cec5SDimitry Andric     return true;
45490b57cec5SDimitry Andric   }
45500b57cec5SDimitry Andric 
45510b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col:
45520b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row:
45530b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride:
45540b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride:
45550b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col:
45560b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row:
45570b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride:
45580b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride:
45590b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col:
45600b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row:
45610b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride:
4562fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride:
4563fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col:
4564fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row:
4565fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride:
4566fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: {
45670b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
45680b57cec5SDimitry Andric     Info.memVT = MVT::v8f32;
45690b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
45700b57cec5SDimitry Andric     Info.offset = 0;
45710b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
45728bcb0991SDimitry Andric     Info.align = Align(16);
45730b57cec5SDimitry Andric     return true;
45740b57cec5SDimitry Andric   }
45750b57cec5SDimitry Andric 
45760b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
45770b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
45780b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
45790b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
45800b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
45810b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
45820b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
45830b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
45840b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
45850b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
45860b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
45870b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
45880b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
45890b57cec5SDimitry Andric     Info.memVT = MVT::v8i32;
45900b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
45910b57cec5SDimitry Andric     Info.offset = 0;
45920b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
45938bcb0991SDimitry Andric     Info.align = Align(16);
45940b57cec5SDimitry Andric     return true;
45950b57cec5SDimitry Andric   }
45960b57cec5SDimitry Andric 
45970b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
45980b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
45990b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
46000b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
46010b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
46020b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
46030b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
46040b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
46050b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
46060b57cec5SDimitry Andric     Info.memVT = MVT::v2i32;
46070b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
46080b57cec5SDimitry Andric     Info.offset = 0;
46090b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
46108bcb0991SDimitry Andric     Info.align = Align(8);
46110b57cec5SDimitry Andric     return true;
46120b57cec5SDimitry Andric   }
46130b57cec5SDimitry Andric 
4614fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col:
4615fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride:
4616fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row:
4617fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: {
4618fe6060f1SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
4619fe6060f1SDimitry Andric     Info.memVT = MVT::v2f64;
4620fe6060f1SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
4621fe6060f1SDimitry Andric     Info.offset = 0;
4622fe6060f1SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
4623fe6060f1SDimitry Andric     Info.align = Align(16);
4624fe6060f1SDimitry Andric     return true;
4625fe6060f1SDimitry Andric   }
4626fe6060f1SDimitry Andric 
46270b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_load_inc_32:
46280b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_load_dec_32:
46290b57cec5SDimitry Andric 
46300b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_add_gen_f_cta:
46310b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_add_gen_f_sys:
46320b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_add_gen_i_cta:
46330b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_add_gen_i_sys:
46340b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_and_gen_i_cta:
46350b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_and_gen_i_sys:
46360b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_cas_gen_i_cta:
46370b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_cas_gen_i_sys:
46380b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_dec_gen_i_cta:
46390b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_dec_gen_i_sys:
46400b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_inc_gen_i_cta:
46410b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_inc_gen_i_sys:
46420b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_max_gen_i_cta:
46430b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_max_gen_i_sys:
46440b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_min_gen_i_cta:
46450b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_min_gen_i_sys:
46460b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_or_gen_i_cta:
46470b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_or_gen_i_sys:
46480b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_exch_gen_i_cta:
46490b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_exch_gen_i_sys:
46500b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_xor_gen_i_cta:
46510b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
46520fca6ea1SDimitry Andric     auto &DL = I.getDataLayout();
46530b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
46540b57cec5SDimitry Andric     Info.memVT = getValueType(DL, I.getType());
46550b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
46560b57cec5SDimitry Andric     Info.offset = 0;
46570b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
46588bcb0991SDimitry Andric     Info.align.reset();
46590b57cec5SDimitry Andric     return true;
46600b57cec5SDimitry Andric   }
46610b57cec5SDimitry Andric 
46620b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_i:
46630b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_f:
46640b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_p: {
46650fca6ea1SDimitry Andric     auto &DL = I.getDataLayout();
46660b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
46670b57cec5SDimitry Andric     if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
46680b57cec5SDimitry Andric       Info.memVT = getValueType(DL, I.getType());
46690b57cec5SDimitry Andric     else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
46700b57cec5SDimitry Andric       Info.memVT = getPointerTy(DL);
46710b57cec5SDimitry Andric     else
46720b57cec5SDimitry Andric       Info.memVT = getValueType(DL, I.getType());
46730b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
46740b57cec5SDimitry Andric     Info.offset = 0;
46750b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
46765ffd83dbSDimitry Andric     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
46770b57cec5SDimitry Andric 
46780b57cec5SDimitry Andric     return true;
46790b57cec5SDimitry Andric   }
46800b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_i:
46810b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_f:
46820b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_p: {
46830fca6ea1SDimitry Andric     auto &DL = I.getDataLayout();
46840b57cec5SDimitry Andric 
46850b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
46860b57cec5SDimitry Andric     if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
46870b57cec5SDimitry Andric       Info.memVT = getValueType(DL, I.getType());
46880b57cec5SDimitry Andric     else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
46890b57cec5SDimitry Andric       Info.memVT = getPointerTy(DL);
46900b57cec5SDimitry Andric     else
46910b57cec5SDimitry Andric       Info.memVT = getValueType(DL, I.getType());
46920b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
46930b57cec5SDimitry Andric     Info.offset = 0;
46940b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
46955ffd83dbSDimitry Andric     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
46960b57cec5SDimitry Andric 
46970b57cec5SDimitry Andric     return true;
46980b57cec5SDimitry Andric   }
46990b57cec5SDimitry Andric 
47000b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4f32_s32:
47010b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4f32_f32:
47020b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
47030b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
47040b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
47050b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
47060b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
47070b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
47080b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4f32_s32:
47090b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4f32_f32:
47100b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
47110b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
47120b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
47130b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
47140b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
47150b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
47160b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4f32_s32:
47170b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4f32_f32:
47180b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
47190b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
47200b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4f32_f32:
47210b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
47220b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
47230b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
47240b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
47250b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
47260b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
47270b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
47280b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
47290b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
47300b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
47310b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
47320b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
47330b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
47340b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
47350b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
47360b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
47370b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
47380b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
47390b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
47400b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
47410b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
47420b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
47430b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
47440b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
47450b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
47460b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
47470b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
47480b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
47490b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
47500b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
47510b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
47527a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
47537a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
47540b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
47550b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
47560b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
47570b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
47580b57cec5SDimitry Andric     Info.opc = getOpcForTextureInstr(Intrinsic);
47590b57cec5SDimitry Andric     Info.memVT = MVT::v4f32;
47600b57cec5SDimitry Andric     Info.ptrVal = nullptr;
47610b57cec5SDimitry Andric     Info.offset = 0;
47620b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
47638bcb0991SDimitry Andric     Info.align = Align(16);
47640b57cec5SDimitry Andric     return true;
47650b57cec5SDimitry Andric 
47660b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4s32_s32:
47670b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4s32_f32:
47680b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
47690b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
47700b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
47710b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
47720b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
47730b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
47740b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4s32_s32:
47750b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4s32_f32:
47760b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
47770b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
47780b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
47790b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
47800b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
47810b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
47820b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4s32_s32:
47830b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4s32_f32:
47840b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
47850b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
47860b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4s32_f32:
47870b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
47880b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
47890b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
47900b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4u32_f32:
47910b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
47920b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
47930b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
47940b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4u32_s32:
47950b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4u32_f32:
47960b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
47970b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
47980b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
47990b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
48000b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
48010b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
48020b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4u32_s32:
48030b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4u32_f32:
48040b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
48050b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
48060b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
48070b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
48080b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
48090b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
48100b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4u32_s32:
48110b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4u32_f32:
48120b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
48130b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
48140b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
48150b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
48160b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
48170b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
48180b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
48190b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
48200b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
48210b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
48220b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
48230b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
48240b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
48250b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
48260b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
48270b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
48280b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
48290b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
48300b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
48310b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
48320b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
48330b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
48340b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
48350b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
48360b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
48370b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
48380b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
48390b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
48400b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
48410b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
48420b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
48430b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
48440b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
48450b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
48460b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
48470b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
48480b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
48490b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
48500b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
48510b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
48520b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
48530b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
48540b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
48550b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
48560b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
48570b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
48580b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
48590b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
48600b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
48610b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
48620b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
48630b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
48640b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
48650b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
48660b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
48670b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
48680b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
48690b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
48707a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
48717a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
48727a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
48737a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
48740b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
48750b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
48760b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
48770b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
48780b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
48790b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
48800b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
48810b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
48820b57cec5SDimitry Andric     Info.opc = getOpcForTextureInstr(Intrinsic);
48830b57cec5SDimitry Andric     Info.memVT = MVT::v4i32;
48840b57cec5SDimitry Andric     Info.ptrVal = nullptr;
48850b57cec5SDimitry Andric     Info.offset = 0;
48860b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
48878bcb0991SDimitry Andric     Info.align = Align(16);
48880b57cec5SDimitry Andric     return true;
48890b57cec5SDimitry Andric 
48900b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_clamp:
48910b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
48920b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
48930b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
48940b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
48950b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
48960b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_clamp:
48970b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
48980b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
48990b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
49000b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
49010b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
49020b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_clamp:
49030b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
49040b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
49050b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_trap:
49060b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_trap:
49070b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_trap:
49080b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_trap:
49090b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
49100b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
49110b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_trap:
49120b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_trap:
49130b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_trap:
49140b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_trap:
49150b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
49160b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
49170b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_trap:
49180b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_trap:
49190b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_trap:
49200b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_zero:
49210b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_zero:
49220b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_zero:
49230b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_zero:
49240b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
49250b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
49260b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_zero:
49270b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_zero:
49280b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_zero:
49290b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_zero:
49300b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
49310b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
49320b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_zero:
49330b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_zero:
49340b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_zero:
49350b57cec5SDimitry Andric     Info.opc = getOpcForSurfaceInstr(Intrinsic);
49360b57cec5SDimitry Andric     Info.memVT = MVT::i8;
49370b57cec5SDimitry Andric     Info.ptrVal = nullptr;
49380b57cec5SDimitry Andric     Info.offset = 0;
49390b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
49408bcb0991SDimitry Andric     Info.align = Align(16);
49410b57cec5SDimitry Andric     return true;
49420b57cec5SDimitry Andric 
49430b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_clamp:
49440b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
49450b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
49460b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
49470b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
49480b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
49490b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_clamp:
49500b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
49510b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
49520b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
49530b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
49540b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
49550b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_clamp:
49560b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
49570b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
49580b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_trap:
49590b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_trap:
49600b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_trap:
49610b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_trap:
49620b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
49630b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
49640b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_trap:
49650b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_trap:
49660b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_trap:
49670b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_trap:
49680b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
49690b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
49700b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_trap:
49710b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_trap:
49720b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_trap:
49730b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_zero:
49740b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_zero:
49750b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_zero:
49760b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_zero:
49770b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
49780b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
49790b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_zero:
49800b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_zero:
49810b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_zero:
49820b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_zero:
49830b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
49840b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
49850b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_zero:
49860b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_zero:
49870b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_zero:
49880b57cec5SDimitry Andric     Info.opc = getOpcForSurfaceInstr(Intrinsic);
49890b57cec5SDimitry Andric     Info.memVT = MVT::i16;
49900b57cec5SDimitry Andric     Info.ptrVal = nullptr;
49910b57cec5SDimitry Andric     Info.offset = 0;
49920b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
49938bcb0991SDimitry Andric     Info.align = Align(16);
49940b57cec5SDimitry Andric     return true;
49950b57cec5SDimitry Andric 
49960b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_clamp:
49970b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
49980b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
49990b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
50000b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
50010b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
50020b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_clamp:
50030b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
50040b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
50050b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
50060b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
50070b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
50080b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_clamp:
50090b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
50100b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
50110b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_trap:
50120b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_trap:
50130b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_trap:
50140b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_trap:
50150b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
50160b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
50170b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_trap:
50180b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_trap:
50190b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_trap:
50200b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_trap:
50210b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
50220b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
50230b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_trap:
50240b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_trap:
50250b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_trap:
50260b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_zero:
50270b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_zero:
50280b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_zero:
50290b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_zero:
50300b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
50310b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
50320b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_zero:
50330b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_zero:
50340b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_zero:
50350b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_zero:
50360b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
50370b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
50380b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_zero:
50390b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_zero:
50400b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_zero:
50410b57cec5SDimitry Andric     Info.opc = getOpcForSurfaceInstr(Intrinsic);
50420b57cec5SDimitry Andric     Info.memVT = MVT::i32;
50430b57cec5SDimitry Andric     Info.ptrVal = nullptr;
50440b57cec5SDimitry Andric     Info.offset = 0;
50450b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
50468bcb0991SDimitry Andric     Info.align = Align(16);
50470b57cec5SDimitry Andric     return true;
50480b57cec5SDimitry Andric 
50490b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_clamp:
50500b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
50510b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
50520b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
50530b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_clamp:
50540b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
50550b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
50560b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
50570b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_clamp:
50580b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
50590b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_trap:
50600b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_trap:
50610b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_trap:
50620b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
50630b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_trap:
50640b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_trap:
50650b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_trap:
50660b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
50670b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_trap:
50680b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_trap:
50690b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_zero:
50700b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_zero:
50710b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_zero:
50720b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
50730b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_zero:
50740b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_zero:
50750b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_zero:
50760b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
50770b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_zero:
50780b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_zero:
50790b57cec5SDimitry Andric     Info.opc = getOpcForSurfaceInstr(Intrinsic);
50800b57cec5SDimitry Andric     Info.memVT = MVT::i64;
50810b57cec5SDimitry Andric     Info.ptrVal = nullptr;
50820b57cec5SDimitry Andric     Info.offset = 0;
50830b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
50848bcb0991SDimitry Andric     Info.align = Align(16);
50850b57cec5SDimitry Andric     return true;
50860b57cec5SDimitry Andric   }
50870b57cec5SDimitry Andric   return false;
50880b57cec5SDimitry Andric }
50890b57cec5SDimitry Andric 
509081ad6265SDimitry Andric /// getFunctionParamOptimizedAlign - since function arguments are passed via
509181ad6265SDimitry Andric /// .param space, we may want to increase their alignment in a way that
509281ad6265SDimitry Andric /// ensures that we can effectively vectorize their loads & stores. We can
509381ad6265SDimitry Andric /// increase alignment only if the function has internal or has private
509481ad6265SDimitry Andric /// linkage as for other linkage types callers may already rely on default
509581ad6265SDimitry Andric /// alignment. To allow using 128-bit vectorized loads/stores, this function
509681ad6265SDimitry Andric /// ensures that alignment is 16 or greater.
509781ad6265SDimitry Andric Align NVPTXTargetLowering::getFunctionParamOptimizedAlign(
509881ad6265SDimitry Andric     const Function *F, Type *ArgTy, const DataLayout &DL) const {
50990fca6ea1SDimitry Andric   // Capping the alignment to 128 bytes as that is the maximum alignment
51000fca6ea1SDimitry Andric   // supported by PTX.
51010fca6ea1SDimitry Andric   const Align ABITypeAlign = std::min(Align(128), DL.getABITypeAlign(ArgTy));
510281ad6265SDimitry Andric 
510381ad6265SDimitry Andric   // If a function has linkage different from internal or private, we
5104bdd1243dSDimitry Andric   // must use default ABI alignment as external users rely on it. Same
5105bdd1243dSDimitry Andric   // for a function that may be called from a function pointer.
5106bdd1243dSDimitry Andric   if (!F || !F->hasLocalLinkage() ||
5107bdd1243dSDimitry Andric       F->hasAddressTaken(/*Users=*/nullptr,
5108bdd1243dSDimitry Andric                          /*IgnoreCallbackUses=*/false,
5109bdd1243dSDimitry Andric                          /*IgnoreAssumeLikeCalls=*/true,
5110bdd1243dSDimitry Andric                          /*IgnoreLLVMUsed=*/true))
51110fca6ea1SDimitry Andric     return ABITypeAlign;
511281ad6265SDimitry Andric 
511381ad6265SDimitry Andric   assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage");
51140fca6ea1SDimitry Andric   return std::max(Align(16), ABITypeAlign);
511581ad6265SDimitry Andric }
511681ad6265SDimitry Andric 
5117bdd1243dSDimitry Andric /// Helper for computing alignment of a device function byval parameter.
5118bdd1243dSDimitry Andric Align NVPTXTargetLowering::getFunctionByValParamAlign(
5119bdd1243dSDimitry Andric     const Function *F, Type *ArgTy, Align InitialAlign,
5120bdd1243dSDimitry Andric     const DataLayout &DL) const {
5121bdd1243dSDimitry Andric   Align ArgAlign = InitialAlign;
5122bdd1243dSDimitry Andric   // Try to increase alignment to enhance vectorization options.
5123bdd1243dSDimitry Andric   if (F)
5124bdd1243dSDimitry Andric     ArgAlign = std::max(ArgAlign, getFunctionParamOptimizedAlign(F, ArgTy, DL));
5125bdd1243dSDimitry Andric 
512606c3fb27SDimitry Andric   // Old ptx versions have a bug. When PTX code takes address of
5127bdd1243dSDimitry Andric   // byval parameter with alignment < 4, ptxas generates code to
5128bdd1243dSDimitry Andric   // spill argument into memory. Alas on sm_50+ ptxas generates
5129bdd1243dSDimitry Andric   // SASS code that fails with misaligned access. To work around
5130bdd1243dSDimitry Andric   // the problem, make sure that we align byval parameters by at
513106c3fb27SDimitry Andric   // least 4. This bug seems to be fixed at least starting from
513206c3fb27SDimitry Andric   // ptxas > 9.0.
513306c3fb27SDimitry Andric   // TODO: remove this after verifying the bug is not reproduced
513406c3fb27SDimitry Andric   // on non-deprecated ptxas versions.
513506c3fb27SDimitry Andric   if (ForceMinByValParamAlign)
5136bdd1243dSDimitry Andric     ArgAlign = std::max(ArgAlign, Align(4));
5137bdd1243dSDimitry Andric 
5138bdd1243dSDimitry Andric   return ArgAlign;
5139bdd1243dSDimitry Andric }
5140bdd1243dSDimitry Andric 
514106c3fb27SDimitry Andric // Helper for getting a function parameter name. Name is composed from
514206c3fb27SDimitry Andric // its index and the function name. Negative index corresponds to special
514306c3fb27SDimitry Andric // parameter (unsized array) used for passing variable arguments.
514406c3fb27SDimitry Andric std::string NVPTXTargetLowering::getParamName(const Function *F,
514506c3fb27SDimitry Andric                                               int Idx) const {
514606c3fb27SDimitry Andric   std::string ParamName;
514706c3fb27SDimitry Andric   raw_string_ostream ParamStr(ParamName);
514806c3fb27SDimitry Andric 
514906c3fb27SDimitry Andric   ParamStr << getTargetMachine().getSymbol(F)->getName();
515006c3fb27SDimitry Andric   if (Idx < 0)
515106c3fb27SDimitry Andric     ParamStr << "_vararg";
515206c3fb27SDimitry Andric   else
515306c3fb27SDimitry Andric     ParamStr << "_param_" << Idx;
515406c3fb27SDimitry Andric 
515506c3fb27SDimitry Andric   return ParamName;
515606c3fb27SDimitry Andric }
515706c3fb27SDimitry Andric 
51580b57cec5SDimitry Andric /// isLegalAddressingMode - Return true if the addressing mode represented
51590b57cec5SDimitry Andric /// by AM is legal for this target, for a load/store of the specified type.
51600b57cec5SDimitry Andric /// Used to guide target specific optimizations, like loop strength reduction
51610b57cec5SDimitry Andric /// (LoopStrengthReduce.cpp) and memory optimization for address mode
51620b57cec5SDimitry Andric /// (CodeGenPrepare.cpp)
51630b57cec5SDimitry Andric bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
51640b57cec5SDimitry Andric                                                 const AddrMode &AM, Type *Ty,
51650b57cec5SDimitry Andric                                                 unsigned AS, Instruction *I) const {
51660b57cec5SDimitry Andric   // AddrMode - This represents an addressing mode of:
51670b57cec5SDimitry Andric   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
51680b57cec5SDimitry Andric   //
51690b57cec5SDimitry Andric   // The legal address modes are
51700b57cec5SDimitry Andric   // - [avar]
51710b57cec5SDimitry Andric   // - [areg]
51720b57cec5SDimitry Andric   // - [areg+immoff]
51730b57cec5SDimitry Andric   // - [immAddr]
51740b57cec5SDimitry Andric 
51750fca6ea1SDimitry Andric   // immoff must fit in a signed 32-bit int
51760fca6ea1SDimitry Andric   if (!APInt(64, AM.BaseOffs).isSignedIntN(32))
51770fca6ea1SDimitry Andric     return false;
51780fca6ea1SDimitry Andric 
51790fca6ea1SDimitry Andric   if (AM.BaseGV)
51800b57cec5SDimitry Andric     return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
51810b57cec5SDimitry Andric 
51820b57cec5SDimitry Andric   switch (AM.Scale) {
51830b57cec5SDimitry Andric   case 0: // "r", "r+i" or "i" is allowed
51840b57cec5SDimitry Andric     break;
51850b57cec5SDimitry Andric   case 1:
51860b57cec5SDimitry Andric     if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
51870b57cec5SDimitry Andric       return false;
51880b57cec5SDimitry Andric     // Otherwise we have r+i.
51890b57cec5SDimitry Andric     break;
51900b57cec5SDimitry Andric   default:
51910b57cec5SDimitry Andric     // No scale > 1 is allowed
51920b57cec5SDimitry Andric     return false;
51930b57cec5SDimitry Andric   }
51940b57cec5SDimitry Andric   return true;
51950b57cec5SDimitry Andric }
51960b57cec5SDimitry Andric 
51970b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
51980b57cec5SDimitry Andric //                         NVPTX Inline Assembly Support
51990b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52000b57cec5SDimitry Andric 
52010b57cec5SDimitry Andric /// getConstraintType - Given a constraint letter, return the type of
52020b57cec5SDimitry Andric /// constraint it is for this target.
52030b57cec5SDimitry Andric NVPTXTargetLowering::ConstraintType
52040b57cec5SDimitry Andric NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
52050b57cec5SDimitry Andric   if (Constraint.size() == 1) {
52060b57cec5SDimitry Andric     switch (Constraint[0]) {
52070b57cec5SDimitry Andric     default:
52080b57cec5SDimitry Andric       break;
52090b57cec5SDimitry Andric     case 'b':
52100b57cec5SDimitry Andric     case 'r':
52110b57cec5SDimitry Andric     case 'h':
52120b57cec5SDimitry Andric     case 'c':
52130b57cec5SDimitry Andric     case 'l':
52140b57cec5SDimitry Andric     case 'f':
52150b57cec5SDimitry Andric     case 'd':
52160fca6ea1SDimitry Andric     case 'q':
52170b57cec5SDimitry Andric     case '0':
52180b57cec5SDimitry Andric     case 'N':
52190b57cec5SDimitry Andric       return C_RegisterClass;
52200b57cec5SDimitry Andric     }
52210b57cec5SDimitry Andric   }
52220b57cec5SDimitry Andric   return TargetLowering::getConstraintType(Constraint);
52230b57cec5SDimitry Andric }
52240b57cec5SDimitry Andric 
52250b57cec5SDimitry Andric std::pair<unsigned, const TargetRegisterClass *>
52260b57cec5SDimitry Andric NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
52270b57cec5SDimitry Andric                                                   StringRef Constraint,
52280b57cec5SDimitry Andric                                                   MVT VT) const {
52290b57cec5SDimitry Andric   if (Constraint.size() == 1) {
52300b57cec5SDimitry Andric     switch (Constraint[0]) {
52310b57cec5SDimitry Andric     case 'b':
52320b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
52330b57cec5SDimitry Andric     case 'c':
52340b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
52350b57cec5SDimitry Andric     case 'h':
52360b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
52370b57cec5SDimitry Andric     case 'r':
52380b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
52390b57cec5SDimitry Andric     case 'l':
52400b57cec5SDimitry Andric     case 'N':
52410b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
52420fca6ea1SDimitry Andric     case 'q': {
52430fca6ea1SDimitry Andric       if (STI.getSmVersion() < 70)
52440fca6ea1SDimitry Andric         report_fatal_error("Inline asm with 128 bit operands is only "
52450fca6ea1SDimitry Andric                            "supported for sm_70 and higher!");
52460fca6ea1SDimitry Andric       return std::make_pair(0U, &NVPTX::Int128RegsRegClass);
52470fca6ea1SDimitry Andric     }
52480b57cec5SDimitry Andric     case 'f':
52490b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
52500b57cec5SDimitry Andric     case 'd':
52510b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
52520b57cec5SDimitry Andric     }
52530b57cec5SDimitry Andric   }
52540b57cec5SDimitry Andric   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
52550b57cec5SDimitry Andric }
52560b57cec5SDimitry Andric 
52570b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52580b57cec5SDimitry Andric //                         NVPTX DAG Combining
52590b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52600b57cec5SDimitry Andric 
52610b57cec5SDimitry Andric bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
52625f757f3fSDimitry Andric                                    CodeGenOptLevel OptLevel) const {
52630b57cec5SDimitry Andric   // Always honor command-line argument
52640b57cec5SDimitry Andric   if (FMAContractLevelOpt.getNumOccurrences() > 0)
52650b57cec5SDimitry Andric     return FMAContractLevelOpt > 0;
52660b57cec5SDimitry Andric 
52670b57cec5SDimitry Andric   // Do not contract if we're not optimizing the code.
52685f757f3fSDimitry Andric   if (OptLevel == CodeGenOptLevel::None)
52690b57cec5SDimitry Andric     return false;
52700b57cec5SDimitry Andric 
52710b57cec5SDimitry Andric   // Honor TargetOptions flags that explicitly say fusion is okay.
52720b57cec5SDimitry Andric   if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
52730b57cec5SDimitry Andric     return true;
52740b57cec5SDimitry Andric 
52750b57cec5SDimitry Andric   return allowUnsafeFPMath(MF);
52760b57cec5SDimitry Andric }
52770b57cec5SDimitry Andric 
52780b57cec5SDimitry Andric bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
52790b57cec5SDimitry Andric   // Honor TargetOptions flags that explicitly say unsafe math is okay.
52800b57cec5SDimitry Andric   if (MF.getTarget().Options.UnsafeFPMath)
52810b57cec5SDimitry Andric     return true;
52820b57cec5SDimitry Andric 
52830b57cec5SDimitry Andric   // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
52840b57cec5SDimitry Andric   const Function &F = MF.getFunction();
5285fe6060f1SDimitry Andric   return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
52860b57cec5SDimitry Andric }
52870b57cec5SDimitry Andric 
52880fca6ea1SDimitry Andric static bool isConstZero(const SDValue &Operand) {
52890fca6ea1SDimitry Andric   const auto *Const = dyn_cast<ConstantSDNode>(Operand);
52900fca6ea1SDimitry Andric   return Const && Const->getZExtValue() == 0;
52910fca6ea1SDimitry Andric }
52920fca6ea1SDimitry Andric 
52930b57cec5SDimitry Andric /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
52940b57cec5SDimitry Andric /// operands N0 and N1.  This is a helper for PerformADDCombine that is
52950b57cec5SDimitry Andric /// called with the default operands, and if that fails, with commuted
52960b57cec5SDimitry Andric /// operands.
52970fca6ea1SDimitry Andric static SDValue
52980fca6ea1SDimitry Andric PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
52990fca6ea1SDimitry Andric                               TargetLowering::DAGCombinerInfo &DCI) {
53000b57cec5SDimitry Andric   EVT VT = N0.getValueType();
53010fca6ea1SDimitry Andric 
53020fca6ea1SDimitry Andric   // Since integer multiply-add costs the same as integer multiply
53030fca6ea1SDimitry Andric   // but is more costly than integer add, do the fusion only when
53040fca6ea1SDimitry Andric   // the mul is only used in the add.
53050fca6ea1SDimitry Andric   // TODO: this may not be true for later architectures, consider relaxing this
53060fca6ea1SDimitry Andric   if (!N0.getNode()->hasOneUse())
53070b57cec5SDimitry Andric     return SDValue();
53080b57cec5SDimitry Andric 
53090b57cec5SDimitry Andric   // fold (add (mul a, b), c) -> (mad a, b, c)
53100b57cec5SDimitry Andric   //
53110fca6ea1SDimitry Andric   if (N0.getOpcode() == ISD::MUL)
53120fca6ea1SDimitry Andric     return DCI.DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, N0.getOperand(0),
53130fca6ea1SDimitry Andric                            N0.getOperand(1), N1);
53140fca6ea1SDimitry Andric 
53150fca6ea1SDimitry Andric   // fold (add (select cond, 0, (mul a, b)), c)
53160fca6ea1SDimitry Andric   //   -> (select cond, c, (mad a, b, c))
53170fca6ea1SDimitry Andric   //
53180fca6ea1SDimitry Andric   if (N0.getOpcode() == ISD::SELECT) {
53190fca6ea1SDimitry Andric     unsigned ZeroOpNum;
53200fca6ea1SDimitry Andric     if (isConstZero(N0->getOperand(1)))
53210fca6ea1SDimitry Andric       ZeroOpNum = 1;
53220fca6ea1SDimitry Andric     else if (isConstZero(N0->getOperand(2)))
53230fca6ea1SDimitry Andric       ZeroOpNum = 2;
53240fca6ea1SDimitry Andric     else
53250b57cec5SDimitry Andric       return SDValue();
53260b57cec5SDimitry Andric 
53270fca6ea1SDimitry Andric     SDValue M = N0->getOperand((ZeroOpNum == 1) ? 2 : 1);
53280fca6ea1SDimitry Andric     if (M->getOpcode() != ISD::MUL || !M.getNode()->hasOneUse())
53290fca6ea1SDimitry Andric       return SDValue();
53300fca6ea1SDimitry Andric 
53310fca6ea1SDimitry Andric     SDValue MAD = DCI.DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
53320fca6ea1SDimitry Andric                                   M->getOperand(0), M->getOperand(1), N1);
53330fca6ea1SDimitry Andric     return DCI.DAG.getSelect(SDLoc(N), VT, N0->getOperand(0),
53340fca6ea1SDimitry Andric                              ((ZeroOpNum == 1) ? N1 : MAD),
53350fca6ea1SDimitry Andric                              ((ZeroOpNum == 1) ? MAD : N1));
53360b57cec5SDimitry Andric   }
53370fca6ea1SDimitry Andric 
53380fca6ea1SDimitry Andric   return SDValue();
53390fca6ea1SDimitry Andric }
53400fca6ea1SDimitry Andric 
53410fca6ea1SDimitry Andric static SDValue
53420fca6ea1SDimitry Andric PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
53430fca6ea1SDimitry Andric                                TargetLowering::DAGCombinerInfo &DCI,
53440fca6ea1SDimitry Andric                                CodeGenOptLevel OptLevel) {
53450fca6ea1SDimitry Andric   EVT VT = N0.getValueType();
53460fca6ea1SDimitry Andric   if (N0.getOpcode() == ISD::FMUL) {
53470b57cec5SDimitry Andric     const auto *TLI = static_cast<const NVPTXTargetLowering *>(
53480fca6ea1SDimitry Andric         &DCI.DAG.getTargetLoweringInfo());
53490fca6ea1SDimitry Andric     if (!TLI->allowFMA(DCI.DAG.getMachineFunction(), OptLevel))
53500b57cec5SDimitry Andric       return SDValue();
53510b57cec5SDimitry Andric 
53520b57cec5SDimitry Andric     // For floating point:
53530b57cec5SDimitry Andric     // Do the fusion only when the mul has less than 5 uses and all
53540b57cec5SDimitry Andric     // are add.
53550b57cec5SDimitry Andric     // The heuristic is that if a use is not an add, then that use
53560b57cec5SDimitry Andric     // cannot be fused into fma, therefore mul is still needed anyway.
53570b57cec5SDimitry Andric     // If there are more than 4 uses, even if they are all add, fusing
53580b57cec5SDimitry Andric     // them will increase register pressue.
53590b57cec5SDimitry Andric     //
53600b57cec5SDimitry Andric     int numUses = 0;
53610b57cec5SDimitry Andric     int nonAddCount = 0;
5362349cc55cSDimitry Andric     for (const SDNode *User : N0.getNode()->uses()) {
53630b57cec5SDimitry Andric       numUses++;
53640b57cec5SDimitry Andric       if (User->getOpcode() != ISD::FADD)
53650b57cec5SDimitry Andric         ++nonAddCount;
53660b57cec5SDimitry Andric       if (numUses >= 5)
53670b57cec5SDimitry Andric         return SDValue();
53680fca6ea1SDimitry Andric     }
53690b57cec5SDimitry Andric     if (nonAddCount) {
53700b57cec5SDimitry Andric       int orderNo = N->getIROrder();
53710b57cec5SDimitry Andric       int orderNo2 = N0.getNode()->getIROrder();
53720b57cec5SDimitry Andric       // simple heuristics here for considering potential register
53730b57cec5SDimitry Andric       // pressure, the logics here is that the differnce are used
53740b57cec5SDimitry Andric       // to measure the distance between def and use, the longer distance
53750b57cec5SDimitry Andric       // more likely cause register pressure.
53760b57cec5SDimitry Andric       if (orderNo - orderNo2 < 500)
53770b57cec5SDimitry Andric         return SDValue();
53780b57cec5SDimitry Andric 
53790fca6ea1SDimitry Andric       // Now, check if at least one of the FMUL's operands is live beyond the
53800fca6ea1SDimitry Andric       // node N, which guarantees that the FMA will not increase register
53810fca6ea1SDimitry Andric       // pressure at node N.
53820b57cec5SDimitry Andric       bool opIsLive = false;
53830b57cec5SDimitry Andric       const SDNode *left = N0.getOperand(0).getNode();
53840b57cec5SDimitry Andric       const SDNode *right = N0.getOperand(1).getNode();
53850b57cec5SDimitry Andric 
53860b57cec5SDimitry Andric       if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
53870b57cec5SDimitry Andric         opIsLive = true;
53880b57cec5SDimitry Andric 
53890b57cec5SDimitry Andric       if (!opIsLive)
5390349cc55cSDimitry Andric         for (const SDNode *User : left->uses()) {
53910b57cec5SDimitry Andric           int orderNo3 = User->getIROrder();
53920b57cec5SDimitry Andric           if (orderNo3 > orderNo) {
53930b57cec5SDimitry Andric             opIsLive = true;
53940b57cec5SDimitry Andric             break;
53950b57cec5SDimitry Andric           }
53960b57cec5SDimitry Andric         }
53970b57cec5SDimitry Andric 
53980b57cec5SDimitry Andric       if (!opIsLive)
5399349cc55cSDimitry Andric         for (const SDNode *User : right->uses()) {
54000b57cec5SDimitry Andric           int orderNo3 = User->getIROrder();
54010b57cec5SDimitry Andric           if (orderNo3 > orderNo) {
54020b57cec5SDimitry Andric             opIsLive = true;
54030b57cec5SDimitry Andric             break;
54040b57cec5SDimitry Andric           }
54050b57cec5SDimitry Andric         }
54060b57cec5SDimitry Andric 
54070b57cec5SDimitry Andric       if (!opIsLive)
54080b57cec5SDimitry Andric         return SDValue();
54090b57cec5SDimitry Andric     }
54100b57cec5SDimitry Andric 
54110fca6ea1SDimitry Andric     return DCI.DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0),
54120fca6ea1SDimitry Andric                            N0.getOperand(1), N1);
54130b57cec5SDimitry Andric   }
54140b57cec5SDimitry Andric 
54150b57cec5SDimitry Andric   return SDValue();
54160b57cec5SDimitry Andric }
54170b57cec5SDimitry Andric 
54180fca6ea1SDimitry Andric static SDValue PerformStoreCombineHelper(SDNode *N, std::size_t Front,
54190fca6ea1SDimitry Andric                                          std::size_t Back) {
54200fca6ea1SDimitry Andric   if (all_of(N->ops().drop_front(Front).drop_back(Back),
54210fca6ea1SDimitry Andric              [](const SDUse &U) { return U.get()->isUndef(); }))
54220fca6ea1SDimitry Andric     // Operand 0 is the previous value in the chain. Cannot return EntryToken
54230fca6ea1SDimitry Andric     // as the previous value will become unused and eliminated later.
54240fca6ea1SDimitry Andric     return N->getOperand(0);
54250fca6ea1SDimitry Andric 
54260fca6ea1SDimitry Andric   return SDValue();
54270fca6ea1SDimitry Andric }
54280fca6ea1SDimitry Andric 
54290fca6ea1SDimitry Andric static SDValue PerformStoreParamCombine(SDNode *N) {
54300fca6ea1SDimitry Andric   // Operands from the 3rd to the 2nd last one are the values to be stored.
54310fca6ea1SDimitry Andric   //   {Chain, ArgID, Offset, Val, Glue}
54320fca6ea1SDimitry Andric   return PerformStoreCombineHelper(N, 3, 1);
54330fca6ea1SDimitry Andric }
54340fca6ea1SDimitry Andric 
543581ad6265SDimitry Andric static SDValue PerformStoreRetvalCombine(SDNode *N) {
543681ad6265SDimitry Andric   // Operands from the 2nd to the last one are the values to be stored
54370fca6ea1SDimitry Andric   return PerformStoreCombineHelper(N, 2, 0);
543881ad6265SDimitry Andric }
543981ad6265SDimitry Andric 
54400b57cec5SDimitry Andric /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
54410b57cec5SDimitry Andric ///
54420b57cec5SDimitry Andric static SDValue PerformADDCombine(SDNode *N,
54430b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI,
54440fca6ea1SDimitry Andric                                  CodeGenOptLevel OptLevel) {
54450fca6ea1SDimitry Andric   if (OptLevel == CodeGenOptLevel::None)
54460fca6ea1SDimitry Andric     return SDValue();
54470fca6ea1SDimitry Andric 
54480fca6ea1SDimitry Andric   SDValue N0 = N->getOperand(0);
54490fca6ea1SDimitry Andric   SDValue N1 = N->getOperand(1);
54500fca6ea1SDimitry Andric 
54510fca6ea1SDimitry Andric   // Skip non-integer, non-scalar case
54520fca6ea1SDimitry Andric   EVT VT = N0.getValueType();
54530fca6ea1SDimitry Andric   if (VT.isVector() || VT != MVT::i32)
54540fca6ea1SDimitry Andric     return SDValue();
54550fca6ea1SDimitry Andric 
54560fca6ea1SDimitry Andric   // First try with the default operand order.
54570fca6ea1SDimitry Andric   if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI))
54580fca6ea1SDimitry Andric     return Result;
54590fca6ea1SDimitry Andric 
54600fca6ea1SDimitry Andric   // If that didn't work, try again with the operands commuted.
54610fca6ea1SDimitry Andric   return PerformADDCombineWithOperands(N, N1, N0, DCI);
54620fca6ea1SDimitry Andric }
54630fca6ea1SDimitry Andric 
54640fca6ea1SDimitry Andric /// PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.
54650fca6ea1SDimitry Andric ///
54660fca6ea1SDimitry Andric static SDValue PerformFADDCombine(SDNode *N,
54670fca6ea1SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI,
54685f757f3fSDimitry Andric                                  CodeGenOptLevel OptLevel) {
54690b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
54700b57cec5SDimitry Andric   SDValue N1 = N->getOperand(1);
54710b57cec5SDimitry Andric 
54720fca6ea1SDimitry Andric   EVT VT = N0.getValueType();
54730fca6ea1SDimitry Andric   if (VT.isVector() || !(VT == MVT::f32 || VT == MVT::f64))
54740fca6ea1SDimitry Andric     return SDValue();
54750fca6ea1SDimitry Andric 
54760b57cec5SDimitry Andric   // First try with the default operand order.
54770fca6ea1SDimitry Andric   if (SDValue Result = PerformFADDCombineWithOperands(N, N0, N1, DCI, OptLevel))
54780b57cec5SDimitry Andric     return Result;
54790b57cec5SDimitry Andric 
54800b57cec5SDimitry Andric   // If that didn't work, try again with the operands commuted.
54810fca6ea1SDimitry Andric   return PerformFADDCombineWithOperands(N, N1, N0, DCI, OptLevel);
54820b57cec5SDimitry Andric }
54830b57cec5SDimitry Andric 
54840b57cec5SDimitry Andric static SDValue PerformANDCombine(SDNode *N,
54850b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI) {
54860b57cec5SDimitry Andric   // The type legalizer turns a vector load of i8 values into a zextload to i16
54870b57cec5SDimitry Andric   // registers, optionally ANY_EXTENDs it (if target type is integer),
54880b57cec5SDimitry Andric   // and ANDs off the high 8 bits. Since we turn this load into a
54890b57cec5SDimitry Andric   // target-specific DAG node, the DAG combiner fails to eliminate these AND
54900b57cec5SDimitry Andric   // nodes. Do that here.
54910b57cec5SDimitry Andric   SDValue Val = N->getOperand(0);
54920b57cec5SDimitry Andric   SDValue Mask = N->getOperand(1);
54930b57cec5SDimitry Andric 
54940b57cec5SDimitry Andric   if (isa<ConstantSDNode>(Val)) {
54950b57cec5SDimitry Andric     std::swap(Val, Mask);
54960b57cec5SDimitry Andric   }
54970b57cec5SDimitry Andric 
54980b57cec5SDimitry Andric   SDValue AExt;
54995f757f3fSDimitry Andric 
55005f757f3fSDimitry Andric   // Convert BFE-> truncate i16 -> and 255
55015f757f3fSDimitry Andric   // To just BFE-> truncate i16, as the value already has all the bits in the
55025f757f3fSDimitry Andric   // right places.
55035f757f3fSDimitry Andric   if (Val.getOpcode() == ISD::TRUNCATE) {
55045f757f3fSDimitry Andric     SDValue BFE = Val.getOperand(0);
55055f757f3fSDimitry Andric     if (BFE.getOpcode() != NVPTXISD::BFE)
55065f757f3fSDimitry Andric       return SDValue();
55075f757f3fSDimitry Andric 
55085f757f3fSDimitry Andric     ConstantSDNode *BFEBits = dyn_cast<ConstantSDNode>(BFE.getOperand(0));
55095f757f3fSDimitry Andric     if (!BFEBits)
55105f757f3fSDimitry Andric       return SDValue();
55115f757f3fSDimitry Andric     uint64_t BFEBitsVal = BFEBits->getZExtValue();
55125f757f3fSDimitry Andric 
55135f757f3fSDimitry Andric     ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
55145f757f3fSDimitry Andric     if (!MaskCnst) {
55155f757f3fSDimitry Andric       // Not an AND with a constant
55165f757f3fSDimitry Andric       return SDValue();
55175f757f3fSDimitry Andric     }
55185f757f3fSDimitry Andric     uint64_t MaskVal = MaskCnst->getZExtValue();
55195f757f3fSDimitry Andric 
55205f757f3fSDimitry Andric     if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1)
55215f757f3fSDimitry Andric       return SDValue();
55225f757f3fSDimitry Andric     // If we get here, the AND is unnecessary.  Just replace it with the trunc
55235f757f3fSDimitry Andric     DCI.CombineTo(N, Val, false);
55245f757f3fSDimitry Andric   }
55250b57cec5SDimitry Andric   // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
55260b57cec5SDimitry Andric   if (Val.getOpcode() == ISD::ANY_EXTEND) {
55270b57cec5SDimitry Andric     AExt = Val;
55280b57cec5SDimitry Andric     Val = Val->getOperand(0);
55290b57cec5SDimitry Andric   }
55300b57cec5SDimitry Andric 
55310b57cec5SDimitry Andric   if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
55320b57cec5SDimitry Andric     Val = Val->getOperand(0);
55330b57cec5SDimitry Andric   }
55340b57cec5SDimitry Andric 
55350b57cec5SDimitry Andric   if (Val->getOpcode() == NVPTXISD::LoadV2 ||
55360b57cec5SDimitry Andric       Val->getOpcode() == NVPTXISD::LoadV4) {
55370b57cec5SDimitry Andric     ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
55380b57cec5SDimitry Andric     if (!MaskCnst) {
55390b57cec5SDimitry Andric       // Not an AND with a constant
55400b57cec5SDimitry Andric       return SDValue();
55410b57cec5SDimitry Andric     }
55420b57cec5SDimitry Andric 
55430b57cec5SDimitry Andric     uint64_t MaskVal = MaskCnst->getZExtValue();
55440b57cec5SDimitry Andric     if (MaskVal != 0xff) {
55450b57cec5SDimitry Andric       // Not an AND that chops off top 8 bits
55460b57cec5SDimitry Andric       return SDValue();
55470b57cec5SDimitry Andric     }
55480b57cec5SDimitry Andric 
55490b57cec5SDimitry Andric     MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
55500b57cec5SDimitry Andric     if (!Mem) {
55510b57cec5SDimitry Andric       // Not a MemSDNode?!?
55520b57cec5SDimitry Andric       return SDValue();
55530b57cec5SDimitry Andric     }
55540b57cec5SDimitry Andric 
55550b57cec5SDimitry Andric     EVT MemVT = Mem->getMemoryVT();
55560b57cec5SDimitry Andric     if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
55570b57cec5SDimitry Andric       // We only handle the i8 case
55580b57cec5SDimitry Andric       return SDValue();
55590b57cec5SDimitry Andric     }
55600b57cec5SDimitry Andric 
55617a6dacacSDimitry Andric     unsigned ExtType = Val->getConstantOperandVal(Val->getNumOperands() - 1);
55620b57cec5SDimitry Andric     if (ExtType == ISD::SEXTLOAD) {
55630b57cec5SDimitry Andric       // If for some reason the load is a sextload, the and is needed to zero
55640b57cec5SDimitry Andric       // out the high 8 bits
55650b57cec5SDimitry Andric       return SDValue();
55660b57cec5SDimitry Andric     }
55670b57cec5SDimitry Andric 
55680b57cec5SDimitry Andric     bool AddTo = false;
55690b57cec5SDimitry Andric     if (AExt.getNode() != nullptr) {
55700b57cec5SDimitry Andric       // Re-insert the ext as a zext.
55710b57cec5SDimitry Andric       Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
55720b57cec5SDimitry Andric                             AExt.getValueType(), Val);
55730b57cec5SDimitry Andric       AddTo = true;
55740b57cec5SDimitry Andric     }
55750b57cec5SDimitry Andric 
55760b57cec5SDimitry Andric     // If we get here, the AND is unnecessary.  Just replace it with the load
55770b57cec5SDimitry Andric     DCI.CombineTo(N, Val, AddTo);
55780b57cec5SDimitry Andric   }
55790b57cec5SDimitry Andric 
55800b57cec5SDimitry Andric   return SDValue();
55810b57cec5SDimitry Andric }
55820b57cec5SDimitry Andric 
55830b57cec5SDimitry Andric static SDValue PerformREMCombine(SDNode *N,
55840b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI,
55855f757f3fSDimitry Andric                                  CodeGenOptLevel OptLevel) {
55860b57cec5SDimitry Andric   assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
55870b57cec5SDimitry Andric 
55880b57cec5SDimitry Andric   // Don't do anything at less than -O2.
55895f757f3fSDimitry Andric   if (OptLevel < CodeGenOptLevel::Default)
55900b57cec5SDimitry Andric     return SDValue();
55910b57cec5SDimitry Andric 
55920b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
55930b57cec5SDimitry Andric   SDLoc DL(N);
55940b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
55950b57cec5SDimitry Andric   bool IsSigned = N->getOpcode() == ISD::SREM;
55960b57cec5SDimitry Andric   unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV;
55970b57cec5SDimitry Andric 
55980b57cec5SDimitry Andric   const SDValue &Num = N->getOperand(0);
55990b57cec5SDimitry Andric   const SDValue &Den = N->getOperand(1);
56000b57cec5SDimitry Andric 
56010b57cec5SDimitry Andric   for (const SDNode *U : Num->uses()) {
56020b57cec5SDimitry Andric     if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
56030b57cec5SDimitry Andric         U->getOperand(1) == Den) {
56040b57cec5SDimitry Andric       // Num % Den -> Num - (Num / Den) * Den
56050b57cec5SDimitry Andric       return DAG.getNode(ISD::SUB, DL, VT, Num,
56060b57cec5SDimitry Andric                          DAG.getNode(ISD::MUL, DL, VT,
56070b57cec5SDimitry Andric                                      DAG.getNode(DivOpc, DL, VT, Num, Den),
56080b57cec5SDimitry Andric                                      Den));
56090b57cec5SDimitry Andric     }
56100b57cec5SDimitry Andric   }
56110b57cec5SDimitry Andric   return SDValue();
56120b57cec5SDimitry Andric }
56130b57cec5SDimitry Andric 
56140b57cec5SDimitry Andric enum OperandSignedness {
56150b57cec5SDimitry Andric   Signed = 0,
56160b57cec5SDimitry Andric   Unsigned,
56170b57cec5SDimitry Andric   Unknown
56180b57cec5SDimitry Andric };
56190b57cec5SDimitry Andric 
56200b57cec5SDimitry Andric /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
56210b57cec5SDimitry Andric /// that can be demoted to \p OptSize bits without loss of information. The
56220b57cec5SDimitry Andric /// signedness of the operand, if determinable, is placed in \p S.
56230b57cec5SDimitry Andric static bool IsMulWideOperandDemotable(SDValue Op,
56240b57cec5SDimitry Andric                                       unsigned OptSize,
56250b57cec5SDimitry Andric                                       OperandSignedness &S) {
56260b57cec5SDimitry Andric   S = Unknown;
56270b57cec5SDimitry Andric 
56280b57cec5SDimitry Andric   if (Op.getOpcode() == ISD::SIGN_EXTEND ||
56290b57cec5SDimitry Andric       Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
56300b57cec5SDimitry Andric     EVT OrigVT = Op.getOperand(0).getValueType();
5631e8d8bef9SDimitry Andric     if (OrigVT.getFixedSizeInBits() <= OptSize) {
56320b57cec5SDimitry Andric       S = Signed;
56330b57cec5SDimitry Andric       return true;
56340b57cec5SDimitry Andric     }
56350b57cec5SDimitry Andric   } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
56360b57cec5SDimitry Andric     EVT OrigVT = Op.getOperand(0).getValueType();
5637e8d8bef9SDimitry Andric     if (OrigVT.getFixedSizeInBits() <= OptSize) {
56380b57cec5SDimitry Andric       S = Unsigned;
56390b57cec5SDimitry Andric       return true;
56400b57cec5SDimitry Andric     }
56410b57cec5SDimitry Andric   }
56420b57cec5SDimitry Andric 
56430b57cec5SDimitry Andric   return false;
56440b57cec5SDimitry Andric }
56450b57cec5SDimitry Andric 
56460b57cec5SDimitry Andric /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
56470b57cec5SDimitry Andric /// be demoted to \p OptSize bits without loss of information. If the operands
56480b57cec5SDimitry Andric /// contain a constant, it should appear as the RHS operand. The signedness of
56490b57cec5SDimitry Andric /// the operands is placed in \p IsSigned.
56500b57cec5SDimitry Andric static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
56510b57cec5SDimitry Andric                                         unsigned OptSize,
56520b57cec5SDimitry Andric                                         bool &IsSigned) {
56530b57cec5SDimitry Andric   OperandSignedness LHSSign;
56540b57cec5SDimitry Andric 
56550b57cec5SDimitry Andric   // The LHS operand must be a demotable op
56560b57cec5SDimitry Andric   if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
56570b57cec5SDimitry Andric     return false;
56580b57cec5SDimitry Andric 
56590b57cec5SDimitry Andric   // We should have been able to determine the signedness from the LHS
56600b57cec5SDimitry Andric   if (LHSSign == Unknown)
56610b57cec5SDimitry Andric     return false;
56620b57cec5SDimitry Andric 
56630b57cec5SDimitry Andric   IsSigned = (LHSSign == Signed);
56640b57cec5SDimitry Andric 
56650b57cec5SDimitry Andric   // The RHS can be a demotable op or a constant
56660b57cec5SDimitry Andric   if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
56670b57cec5SDimitry Andric     const APInt &Val = CI->getAPIntValue();
56680b57cec5SDimitry Andric     if (LHSSign == Unsigned) {
56690b57cec5SDimitry Andric       return Val.isIntN(OptSize);
56700b57cec5SDimitry Andric     } else {
56710b57cec5SDimitry Andric       return Val.isSignedIntN(OptSize);
56720b57cec5SDimitry Andric     }
56730b57cec5SDimitry Andric   } else {
56740b57cec5SDimitry Andric     OperandSignedness RHSSign;
56750b57cec5SDimitry Andric     if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
56760b57cec5SDimitry Andric       return false;
56770b57cec5SDimitry Andric 
56780b57cec5SDimitry Andric     return LHSSign == RHSSign;
56790b57cec5SDimitry Andric   }
56800b57cec5SDimitry Andric }
56810b57cec5SDimitry Andric 
56820b57cec5SDimitry Andric /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
56830b57cec5SDimitry Andric /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
56840b57cec5SDimitry Andric /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
56850b57cec5SDimitry Andric /// amount.
56860b57cec5SDimitry Andric static SDValue TryMULWIDECombine(SDNode *N,
56870b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI) {
56880b57cec5SDimitry Andric   EVT MulType = N->getValueType(0);
56890b57cec5SDimitry Andric   if (MulType != MVT::i32 && MulType != MVT::i64) {
56900b57cec5SDimitry Andric     return SDValue();
56910b57cec5SDimitry Andric   }
56920b57cec5SDimitry Andric 
56930b57cec5SDimitry Andric   SDLoc DL(N);
56940b57cec5SDimitry Andric   unsigned OptSize = MulType.getSizeInBits() >> 1;
56950b57cec5SDimitry Andric   SDValue LHS = N->getOperand(0);
56960b57cec5SDimitry Andric   SDValue RHS = N->getOperand(1);
56970b57cec5SDimitry Andric 
56980b57cec5SDimitry Andric   // Canonicalize the multiply so the constant (if any) is on the right
56990b57cec5SDimitry Andric   if (N->getOpcode() == ISD::MUL) {
57000b57cec5SDimitry Andric     if (isa<ConstantSDNode>(LHS)) {
57010b57cec5SDimitry Andric       std::swap(LHS, RHS);
57020b57cec5SDimitry Andric     }
57030b57cec5SDimitry Andric   }
57040b57cec5SDimitry Andric 
57050b57cec5SDimitry Andric   // If we have a SHL, determine the actual multiply amount
57060b57cec5SDimitry Andric   if (N->getOpcode() == ISD::SHL) {
57070b57cec5SDimitry Andric     ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
57080b57cec5SDimitry Andric     if (!ShlRHS) {
57090b57cec5SDimitry Andric       return SDValue();
57100b57cec5SDimitry Andric     }
57110b57cec5SDimitry Andric 
57120b57cec5SDimitry Andric     APInt ShiftAmt = ShlRHS->getAPIntValue();
57130b57cec5SDimitry Andric     unsigned BitWidth = MulType.getSizeInBits();
57140b57cec5SDimitry Andric     if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
57150b57cec5SDimitry Andric       APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
57160b57cec5SDimitry Andric       RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
57170b57cec5SDimitry Andric     } else {
57180b57cec5SDimitry Andric       return SDValue();
57190b57cec5SDimitry Andric     }
57200b57cec5SDimitry Andric   }
57210b57cec5SDimitry Andric 
57220b57cec5SDimitry Andric   bool Signed;
57230b57cec5SDimitry Andric   // Verify that our operands are demotable
57240b57cec5SDimitry Andric   if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
57250b57cec5SDimitry Andric     return SDValue();
57260b57cec5SDimitry Andric   }
57270b57cec5SDimitry Andric 
57280b57cec5SDimitry Andric   EVT DemotedVT;
57290b57cec5SDimitry Andric   if (MulType == MVT::i32) {
57300b57cec5SDimitry Andric     DemotedVT = MVT::i16;
57310b57cec5SDimitry Andric   } else {
57320b57cec5SDimitry Andric     DemotedVT = MVT::i32;
57330b57cec5SDimitry Andric   }
57340b57cec5SDimitry Andric 
57350b57cec5SDimitry Andric   // Truncate the operands to the correct size. Note that these are just for
57360b57cec5SDimitry Andric   // type consistency and will (likely) be eliminated in later phases.
57370b57cec5SDimitry Andric   SDValue TruncLHS =
57380b57cec5SDimitry Andric     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
57390b57cec5SDimitry Andric   SDValue TruncRHS =
57400b57cec5SDimitry Andric     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
57410b57cec5SDimitry Andric 
57420b57cec5SDimitry Andric   unsigned Opc;
57430b57cec5SDimitry Andric   if (Signed) {
57440b57cec5SDimitry Andric     Opc = NVPTXISD::MUL_WIDE_SIGNED;
57450b57cec5SDimitry Andric   } else {
57460b57cec5SDimitry Andric     Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
57470b57cec5SDimitry Andric   }
57480b57cec5SDimitry Andric 
57490b57cec5SDimitry Andric   return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
57500b57cec5SDimitry Andric }
57510b57cec5SDimitry Andric 
57520fca6ea1SDimitry Andric static bool isConstOne(const SDValue &Operand) {
57530fca6ea1SDimitry Andric   const auto *Const = dyn_cast<ConstantSDNode>(Operand);
57540fca6ea1SDimitry Andric   return Const && Const->getZExtValue() == 1;
57550fca6ea1SDimitry Andric }
57560fca6ea1SDimitry Andric 
57570fca6ea1SDimitry Andric static SDValue matchMADConstOnePattern(SDValue Add) {
57580fca6ea1SDimitry Andric   if (Add->getOpcode() != ISD::ADD)
57590fca6ea1SDimitry Andric     return SDValue();
57600fca6ea1SDimitry Andric 
57610fca6ea1SDimitry Andric   if (isConstOne(Add->getOperand(0)))
57620fca6ea1SDimitry Andric     return Add->getOperand(1);
57630fca6ea1SDimitry Andric 
57640fca6ea1SDimitry Andric   if (isConstOne(Add->getOperand(1)))
57650fca6ea1SDimitry Andric     return Add->getOperand(0);
57660fca6ea1SDimitry Andric 
57670fca6ea1SDimitry Andric   return SDValue();
57680fca6ea1SDimitry Andric }
57690fca6ea1SDimitry Andric 
57700fca6ea1SDimitry Andric static SDValue combineMADConstOne(SDValue X, SDValue Add, EVT VT, SDLoc DL,
57710fca6ea1SDimitry Andric                                   TargetLowering::DAGCombinerInfo &DCI) {
57720fca6ea1SDimitry Andric 
57730fca6ea1SDimitry Andric   if (SDValue Y = matchMADConstOnePattern(Add))
57740fca6ea1SDimitry Andric     return DCI.DAG.getNode(NVPTXISD::IMAD, DL, VT, X, Y, X);
57750fca6ea1SDimitry Andric 
57760fca6ea1SDimitry Andric   return SDValue();
57770fca6ea1SDimitry Andric }
57780fca6ea1SDimitry Andric 
57790fca6ea1SDimitry Andric static SDValue combineMulSelectConstOne(SDValue X, SDValue Select, EVT VT,
57800fca6ea1SDimitry Andric                                         SDLoc DL,
57810fca6ea1SDimitry Andric                                         TargetLowering::DAGCombinerInfo &DCI) {
57820fca6ea1SDimitry Andric   if (Select->getOpcode() != ISD::SELECT)
57830fca6ea1SDimitry Andric     return SDValue();
57840fca6ea1SDimitry Andric 
57850fca6ea1SDimitry Andric   SDValue Cond = Select->getOperand(0);
57860fca6ea1SDimitry Andric 
57870fca6ea1SDimitry Andric   unsigned ConstOpNo;
57880fca6ea1SDimitry Andric   if (isConstOne(Select->getOperand(1)))
57890fca6ea1SDimitry Andric     ConstOpNo = 1;
57900fca6ea1SDimitry Andric   else if (isConstOne(Select->getOperand(2)))
57910fca6ea1SDimitry Andric     ConstOpNo = 2;
57920fca6ea1SDimitry Andric   else
57930fca6ea1SDimitry Andric     return SDValue();
57940fca6ea1SDimitry Andric 
57950fca6ea1SDimitry Andric   SDValue Y = Select->getOperand((ConstOpNo == 1) ? 2 : 1);
57960fca6ea1SDimitry Andric 
57970fca6ea1SDimitry Andric   // Do not combine if the resulting sequence is not obviously profitable.
57980fca6ea1SDimitry Andric   if (!matchMADConstOnePattern(Y))
57990fca6ea1SDimitry Andric     return SDValue();
58000fca6ea1SDimitry Andric 
58010fca6ea1SDimitry Andric   SDValue NewMul = DCI.DAG.getNode(ISD::MUL, DL, VT, X, Y);
58020fca6ea1SDimitry Andric 
58030fca6ea1SDimitry Andric   return DCI.DAG.getNode(ISD::SELECT, DL, VT, Cond,
58040fca6ea1SDimitry Andric                          (ConstOpNo == 1) ? X : NewMul,
58050fca6ea1SDimitry Andric                          (ConstOpNo == 1) ? NewMul : X);
58060fca6ea1SDimitry Andric }
58070fca6ea1SDimitry Andric 
58080fca6ea1SDimitry Andric static SDValue
58090fca6ea1SDimitry Andric PerformMULCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
58100fca6ea1SDimitry Andric                               TargetLowering::DAGCombinerInfo &DCI) {
58110fca6ea1SDimitry Andric 
58120fca6ea1SDimitry Andric   EVT VT = N0.getValueType();
58130fca6ea1SDimitry Andric   if (VT.isVector())
58140fca6ea1SDimitry Andric     return SDValue();
58150fca6ea1SDimitry Andric 
58160fca6ea1SDimitry Andric   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
58170fca6ea1SDimitry Andric     return SDValue();
58180fca6ea1SDimitry Andric 
58190fca6ea1SDimitry Andric   SDLoc DL(N);
58200fca6ea1SDimitry Andric 
58210fca6ea1SDimitry Andric   // (mul x, (add y, 1)) -> (mad x, y, x)
58220fca6ea1SDimitry Andric   if (SDValue Res = combineMADConstOne(N0, N1, VT, DL, DCI))
58230fca6ea1SDimitry Andric     return Res;
58240fca6ea1SDimitry Andric   if (SDValue Res = combineMADConstOne(N1, N0, VT, DL, DCI))
58250fca6ea1SDimitry Andric     return Res;
58260fca6ea1SDimitry Andric 
58270fca6ea1SDimitry Andric   // (mul x, (select y, 1)) -> (select (mul x, y), x)
58280fca6ea1SDimitry Andric   if (SDValue Res = combineMulSelectConstOne(N0, N1, VT, DL, DCI))
58290fca6ea1SDimitry Andric     return Res;
58300fca6ea1SDimitry Andric   if (SDValue Res = combineMulSelectConstOne(N1, N0, VT, DL, DCI))
58310fca6ea1SDimitry Andric     return Res;
58320fca6ea1SDimitry Andric 
58330fca6ea1SDimitry Andric   return SDValue();
58340fca6ea1SDimitry Andric }
58350fca6ea1SDimitry Andric 
58360b57cec5SDimitry Andric /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
58370b57cec5SDimitry Andric static SDValue PerformMULCombine(SDNode *N,
58380b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI,
58395f757f3fSDimitry Andric                                  CodeGenOptLevel OptLevel) {
58400fca6ea1SDimitry Andric   if (OptLevel == CodeGenOptLevel::None)
58410fca6ea1SDimitry Andric     return SDValue();
58420fca6ea1SDimitry Andric 
58430b57cec5SDimitry Andric   if (SDValue Ret = TryMULWIDECombine(N, DCI))
58440b57cec5SDimitry Andric     return Ret;
58450b57cec5SDimitry Andric 
58460fca6ea1SDimitry Andric   SDValue N0 = N->getOperand(0);
58470fca6ea1SDimitry Andric   SDValue N1 = N->getOperand(1);
58480fca6ea1SDimitry Andric   return PerformMULCombineWithOperands(N, N0, N1, DCI);
58490b57cec5SDimitry Andric }
58500b57cec5SDimitry Andric 
58510b57cec5SDimitry Andric /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
58520b57cec5SDimitry Andric static SDValue PerformSHLCombine(SDNode *N,
58530b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI,
58545f757f3fSDimitry Andric                                  CodeGenOptLevel OptLevel) {
58555f757f3fSDimitry Andric   if (OptLevel > CodeGenOptLevel::None) {
58560b57cec5SDimitry Andric     // Try mul.wide combining at OptLevel > 0
58570b57cec5SDimitry Andric     if (SDValue Ret = TryMULWIDECombine(N, DCI))
58580b57cec5SDimitry Andric       return Ret;
58590b57cec5SDimitry Andric   }
58600b57cec5SDimitry Andric 
58610b57cec5SDimitry Andric   return SDValue();
58620b57cec5SDimitry Andric }
58630b57cec5SDimitry Andric 
58640b57cec5SDimitry Andric static SDValue PerformSETCCCombine(SDNode *N,
58655f757f3fSDimitry Andric                                    TargetLowering::DAGCombinerInfo &DCI,
58665f757f3fSDimitry Andric                                    unsigned int SmVersion) {
58670b57cec5SDimitry Andric   EVT CCType = N->getValueType(0);
58680b57cec5SDimitry Andric   SDValue A = N->getOperand(0);
58690b57cec5SDimitry Andric   SDValue B = N->getOperand(1);
58700b57cec5SDimitry Andric 
58715f757f3fSDimitry Andric   EVT AType = A.getValueType();
58725f757f3fSDimitry Andric   if (!(CCType == MVT::v2i1 && (AType == MVT::v2f16 || AType == MVT::v2bf16)))
58735f757f3fSDimitry Andric     return SDValue();
58745f757f3fSDimitry Andric 
58755f757f3fSDimitry Andric   if (A.getValueType() == MVT::v2bf16 && SmVersion < 90)
58760b57cec5SDimitry Andric     return SDValue();
58770b57cec5SDimitry Andric 
58780b57cec5SDimitry Andric   SDLoc DL(N);
58790b57cec5SDimitry Andric   // setp.f16x2 returns two scalar predicates, which we need to
58800b57cec5SDimitry Andric   // convert back to v2i1. The returned result will be scalarized by
58810b57cec5SDimitry Andric   // the legalizer, but the comparison will remain a single vector
58820b57cec5SDimitry Andric   // instruction.
58835f757f3fSDimitry Andric   SDValue CCNode = DCI.DAG.getNode(
58845f757f3fSDimitry Andric       A.getValueType() == MVT::v2f16 ? NVPTXISD::SETP_F16X2
58855f757f3fSDimitry Andric                                      : NVPTXISD::SETP_BF16X2,
58865f757f3fSDimitry Andric       DL, DCI.DAG.getVTList(MVT::i1, MVT::i1), {A, B, N->getOperand(2)});
58870b57cec5SDimitry Andric   return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
58880b57cec5SDimitry Andric                          CCNode.getValue(1));
58890b57cec5SDimitry Andric }
58900b57cec5SDimitry Andric 
58915f757f3fSDimitry Andric static SDValue PerformEXTRACTCombine(SDNode *N,
58925f757f3fSDimitry Andric                                      TargetLowering::DAGCombinerInfo &DCI) {
58935f757f3fSDimitry Andric   SDValue Vector = N->getOperand(0);
58945f757f3fSDimitry Andric   SDLoc DL(N);
58955f757f3fSDimitry Andric   EVT VectorVT = Vector.getValueType();
58965f757f3fSDimitry Andric   if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() &&
58975f757f3fSDimitry Andric       IsPTXVectorType(VectorVT.getSimpleVT()))
58985f757f3fSDimitry Andric     return SDValue(); // Native vector loads already combine nicely w/
58990fca6ea1SDimitry Andric                       // extract_vector_elt.
59000fca6ea1SDimitry Andric   // Don't mess with singletons or v2*16, v4i8 and v8i8 types, we already
59010fca6ea1SDimitry Andric   // handle them OK.
59025f757f3fSDimitry Andric   if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) ||
59030fca6ea1SDimitry Andric       VectorVT == MVT::v4i8 || VectorVT == MVT::v8i8)
59040fca6ea1SDimitry Andric     return SDValue();
59050fca6ea1SDimitry Andric 
59060fca6ea1SDimitry Andric   // Don't mess with undef values as sra may be simplified to 0, not undef.
59070fca6ea1SDimitry Andric   if (Vector->isUndef() || ISD::allOperandsUndef(Vector.getNode()))
59085f757f3fSDimitry Andric     return SDValue();
59095f757f3fSDimitry Andric 
59105f757f3fSDimitry Andric   uint64_t VectorBits = VectorVT.getSizeInBits();
59115f757f3fSDimitry Andric   // We only handle the types we can extract in-register.
59125f757f3fSDimitry Andric   if (!(VectorBits == 16 || VectorBits == 32 || VectorBits == 64))
59135f757f3fSDimitry Andric     return SDValue();
59145f757f3fSDimitry Andric 
59155f757f3fSDimitry Andric   ConstantSDNode *Index = dyn_cast<ConstantSDNode>(N->getOperand(1));
59165f757f3fSDimitry Andric   // Index == 0 is handled by generic DAG combiner.
59175f757f3fSDimitry Andric   if (!Index || Index->getZExtValue() == 0)
59185f757f3fSDimitry Andric     return SDValue();
59195f757f3fSDimitry Andric 
59205f757f3fSDimitry Andric   MVT IVT = MVT::getIntegerVT(VectorBits);
59215f757f3fSDimitry Andric   EVT EltVT = VectorVT.getVectorElementType();
59225f757f3fSDimitry Andric   EVT EltIVT = EltVT.changeTypeToInteger();
59235f757f3fSDimitry Andric   uint64_t EltBits = EltVT.getScalarSizeInBits();
59245f757f3fSDimitry Andric 
59255f757f3fSDimitry Andric   SDValue Result = DCI.DAG.getNode(
59265f757f3fSDimitry Andric       ISD::TRUNCATE, DL, EltIVT,
59275f757f3fSDimitry Andric       DCI.DAG.getNode(
59285f757f3fSDimitry Andric           ISD::SRA, DL, IVT, DCI.DAG.getNode(ISD::BITCAST, DL, IVT, Vector),
59295f757f3fSDimitry Andric           DCI.DAG.getConstant(Index->getZExtValue() * EltBits, DL, IVT)));
59305f757f3fSDimitry Andric 
59315f757f3fSDimitry Andric   // If element has non-integer type, bitcast it back to the expected type.
59325f757f3fSDimitry Andric   if (EltVT != EltIVT)
59335f757f3fSDimitry Andric     Result = DCI.DAG.getNode(ISD::BITCAST, DL, EltVT, Result);
59345f757f3fSDimitry Andric   // Past legalizer, we may need to extent i8 -> i16 to match the register type.
59355f757f3fSDimitry Andric   if (EltVT != N->getValueType(0))
59365f757f3fSDimitry Andric     Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result);
59375f757f3fSDimitry Andric 
59385f757f3fSDimitry Andric   return Result;
59395f757f3fSDimitry Andric }
59405f757f3fSDimitry Andric 
59415f757f3fSDimitry Andric static SDValue PerformVSELECTCombine(SDNode *N,
59425f757f3fSDimitry Andric                                      TargetLowering::DAGCombinerInfo &DCI) {
59435f757f3fSDimitry Andric   SDValue VA = N->getOperand(1);
59445f757f3fSDimitry Andric   EVT VectorVT = VA.getValueType();
59455f757f3fSDimitry Andric   if (VectorVT != MVT::v4i8)
59465f757f3fSDimitry Andric     return SDValue();
59475f757f3fSDimitry Andric 
59485f757f3fSDimitry Andric   // We need to split vselect into individual per-element operations Because we
59495f757f3fSDimitry Andric   // use BFE/BFI instruction for byte extraction/insertion, we do end up with
59505f757f3fSDimitry Andric   // 32-bit values, so we may as well do comparison as i32 to avoid conversions
59515f757f3fSDimitry Andric   // to/from i16 normally used for i8 values.
59525f757f3fSDimitry Andric   SmallVector<SDValue, 4> E;
59535f757f3fSDimitry Andric   SDLoc DL(N);
59545f757f3fSDimitry Andric   SDValue VCond = N->getOperand(0);
59555f757f3fSDimitry Andric   SDValue VB = N->getOperand(2);
59565f757f3fSDimitry Andric   for (int I = 0; I < 4; ++I) {
59575f757f3fSDimitry Andric     SDValue C = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i1, VCond,
59585f757f3fSDimitry Andric                                 DCI.DAG.getConstant(I, DL, MVT::i32));
59595f757f3fSDimitry Andric     SDValue EA = DCI.DAG.getAnyExtOrTrunc(
59605f757f3fSDimitry Andric         DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VA,
59615f757f3fSDimitry Andric                         DCI.DAG.getConstant(I, DL, MVT::i32)),
59625f757f3fSDimitry Andric         DL, MVT::i32);
59635f757f3fSDimitry Andric     SDValue EB = DCI.DAG.getAnyExtOrTrunc(
59645f757f3fSDimitry Andric         DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VB,
59655f757f3fSDimitry Andric                         DCI.DAG.getConstant(I, DL, MVT::i32)),
59665f757f3fSDimitry Andric         DL, MVT::i32);
59675f757f3fSDimitry Andric     E.push_back(DCI.DAG.getAnyExtOrTrunc(
59685f757f3fSDimitry Andric         DCI.DAG.getNode(ISD::SELECT, DL, MVT::i32, C, EA, EB), DL, MVT::i8));
59695f757f3fSDimitry Andric   }
59705f757f3fSDimitry Andric   return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i8, E);
59715f757f3fSDimitry Andric }
59725f757f3fSDimitry Andric 
59735f757f3fSDimitry Andric static SDValue PerformLOADCombine(SDNode *N,
59745f757f3fSDimitry Andric                                   TargetLowering::DAGCombinerInfo &DCI) {
59755f757f3fSDimitry Andric   SelectionDAG &DAG = DCI.DAG;
59765f757f3fSDimitry Andric   LoadSDNode *LD = cast<LoadSDNode>(N);
59775f757f3fSDimitry Andric 
59785f757f3fSDimitry Andric   // Lower a v16i8 load into a LoadV4 operation with i32 results instead of
59795f757f3fSDimitry Andric   // letting ReplaceLoadVector split it into smaller loads during legalization.
59805f757f3fSDimitry Andric   // This is done at dag-combine1 time, so that vector operations with i8
59815f757f3fSDimitry Andric   // elements can be optimised away instead of being needlessly split during
59825f757f3fSDimitry Andric   // legalization, which involves storing to the stack and loading it back.
59835f757f3fSDimitry Andric   EVT VT = N->getValueType(0);
59845f757f3fSDimitry Andric   if (VT != MVT::v16i8)
59855f757f3fSDimitry Andric     return SDValue();
59865f757f3fSDimitry Andric 
59875f757f3fSDimitry Andric   SDLoc DL(N);
59885f757f3fSDimitry Andric 
59895f757f3fSDimitry Andric   // Create a v4i32 vector load operation, effectively <4 x v4i8>.
59905f757f3fSDimitry Andric   unsigned Opc = NVPTXISD::LoadV4;
59915f757f3fSDimitry Andric   EVT NewVT = MVT::v4i32;
59925f757f3fSDimitry Andric   EVT EltVT = NewVT.getVectorElementType();
59935f757f3fSDimitry Andric   unsigned NumElts = NewVT.getVectorNumElements();
59945f757f3fSDimitry Andric   EVT RetVTs[] = {EltVT, EltVT, EltVT, EltVT, MVT::Other};
59955f757f3fSDimitry Andric   SDVTList RetVTList = DAG.getVTList(RetVTs);
59965f757f3fSDimitry Andric   SmallVector<SDValue, 8> Ops(N->ops());
59975f757f3fSDimitry Andric   Ops.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
59985f757f3fSDimitry Andric   SDValue NewLoad = DAG.getMemIntrinsicNode(Opc, DL, RetVTList, Ops, NewVT,
59995f757f3fSDimitry Andric                                             LD->getMemOperand());
60005f757f3fSDimitry Andric   SDValue NewChain = NewLoad.getValue(NumElts);
60015f757f3fSDimitry Andric 
60025f757f3fSDimitry Andric   // Create a vector of the same type returned by the original load.
60035f757f3fSDimitry Andric   SmallVector<SDValue, 4> Elts;
60045f757f3fSDimitry Andric   for (unsigned i = 0; i < NumElts; i++)
60055f757f3fSDimitry Andric     Elts.push_back(NewLoad.getValue(i));
60065f757f3fSDimitry Andric   return DCI.DAG.getMergeValues(
60075f757f3fSDimitry Andric       {DCI.DAG.getBitcast(VT, DCI.DAG.getBuildVector(NewVT, DL, Elts)),
60085f757f3fSDimitry Andric        NewChain},
60095f757f3fSDimitry Andric       DL);
60105f757f3fSDimitry Andric }
60115f757f3fSDimitry Andric 
60120b57cec5SDimitry Andric SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
60130b57cec5SDimitry Andric                                                DAGCombinerInfo &DCI) const {
60145f757f3fSDimitry Andric   CodeGenOptLevel OptLevel = getTargetMachine().getOptLevel();
60150b57cec5SDimitry Andric   switch (N->getOpcode()) {
60160b57cec5SDimitry Andric     default: break;
60170b57cec5SDimitry Andric     case ISD::ADD:
60180fca6ea1SDimitry Andric       return PerformADDCombine(N, DCI, OptLevel);
60190b57cec5SDimitry Andric     case ISD::FADD:
60200fca6ea1SDimitry Andric       return PerformFADDCombine(N, DCI, OptLevel);
60210b57cec5SDimitry Andric     case ISD::MUL:
60220b57cec5SDimitry Andric       return PerformMULCombine(N, DCI, OptLevel);
60230b57cec5SDimitry Andric     case ISD::SHL:
60240b57cec5SDimitry Andric       return PerformSHLCombine(N, DCI, OptLevel);
60250b57cec5SDimitry Andric     case ISD::AND:
60260b57cec5SDimitry Andric       return PerformANDCombine(N, DCI);
60270b57cec5SDimitry Andric     case ISD::UREM:
60280b57cec5SDimitry Andric     case ISD::SREM:
60290b57cec5SDimitry Andric       return PerformREMCombine(N, DCI, OptLevel);
60300b57cec5SDimitry Andric     case ISD::SETCC:
60315f757f3fSDimitry Andric       return PerformSETCCCombine(N, DCI, STI.getSmVersion());
60325f757f3fSDimitry Andric     case ISD::LOAD:
60335f757f3fSDimitry Andric       return PerformLOADCombine(N, DCI);
603481ad6265SDimitry Andric     case NVPTXISD::StoreRetval:
603581ad6265SDimitry Andric     case NVPTXISD::StoreRetvalV2:
603681ad6265SDimitry Andric     case NVPTXISD::StoreRetvalV4:
603781ad6265SDimitry Andric       return PerformStoreRetvalCombine(N);
60380fca6ea1SDimitry Andric     case NVPTXISD::StoreParam:
60390fca6ea1SDimitry Andric     case NVPTXISD::StoreParamV2:
60400fca6ea1SDimitry Andric     case NVPTXISD::StoreParamV4:
60410fca6ea1SDimitry Andric       return PerformStoreParamCombine(N);
60425f757f3fSDimitry Andric     case ISD::EXTRACT_VECTOR_ELT:
60435f757f3fSDimitry Andric       return PerformEXTRACTCombine(N, DCI);
60445f757f3fSDimitry Andric     case ISD::VSELECT:
60455f757f3fSDimitry Andric       return PerformVSELECTCombine(N, DCI);
60460b57cec5SDimitry Andric   }
60470b57cec5SDimitry Andric   return SDValue();
60480b57cec5SDimitry Andric }
60490b57cec5SDimitry Andric 
60500b57cec5SDimitry Andric /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
60510b57cec5SDimitry Andric static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
60520b57cec5SDimitry Andric                               SmallVectorImpl<SDValue> &Results) {
60530b57cec5SDimitry Andric   EVT ResVT = N->getValueType(0);
60540b57cec5SDimitry Andric   SDLoc DL(N);
60550b57cec5SDimitry Andric 
60560b57cec5SDimitry Andric   assert(ResVT.isVector() && "Vector load must have vector type");
60570b57cec5SDimitry Andric 
60580b57cec5SDimitry Andric   // We only handle "native" vector sizes for now, e.g. <4 x double> is not
60590b57cec5SDimitry Andric   // legal.  We can (and should) split that into 2 loads of <2 x double> here
60600b57cec5SDimitry Andric   // but I'm leaving that as a TODO for now.
60610b57cec5SDimitry Andric   assert(ResVT.isSimple() && "Can only handle simple types");
60620b57cec5SDimitry Andric   switch (ResVT.getSimpleVT().SimpleTy) {
60630b57cec5SDimitry Andric   default:
60640b57cec5SDimitry Andric     return;
60650b57cec5SDimitry Andric   case MVT::v2i8:
60660b57cec5SDimitry Andric   case MVT::v2i16:
60670b57cec5SDimitry Andric   case MVT::v2i32:
60680b57cec5SDimitry Andric   case MVT::v2i64:
60690b57cec5SDimitry Andric   case MVT::v2f16:
60700b57cec5SDimitry Andric   case MVT::v2f32:
60710b57cec5SDimitry Andric   case MVT::v2f64:
60720b57cec5SDimitry Andric   case MVT::v4i8:
60730b57cec5SDimitry Andric   case MVT::v4i16:
60740b57cec5SDimitry Andric   case MVT::v4i32:
60750b57cec5SDimitry Andric   case MVT::v4f16:
60760b57cec5SDimitry Andric   case MVT::v4f32:
60770b57cec5SDimitry Andric   case MVT::v8f16:  // <4 x f16x2>
60785f757f3fSDimitry Andric   case MVT::v8bf16: // <4 x bf16x2>
60795f757f3fSDimitry Andric   case MVT::v8i16:  // <4 x i16x2>
60800b57cec5SDimitry Andric     // This is a "native" vector type
60810b57cec5SDimitry Andric     break;
60820b57cec5SDimitry Andric   }
60830b57cec5SDimitry Andric 
60840b57cec5SDimitry Andric   LoadSDNode *LD = cast<LoadSDNode>(N);
60850b57cec5SDimitry Andric 
60865ffd83dbSDimitry Andric   Align Alignment = LD->getAlign();
60870b57cec5SDimitry Andric   auto &TD = DAG.getDataLayout();
608806c3fb27SDimitry Andric   Align PrefAlign =
608906c3fb27SDimitry Andric       TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
60905ffd83dbSDimitry Andric   if (Alignment < PrefAlign) {
60910b57cec5SDimitry Andric     // This load is not sufficiently aligned, so bail out and let this vector
60920b57cec5SDimitry Andric     // load be scalarized.  Note that we may still be able to emit smaller
60930b57cec5SDimitry Andric     // vector loads.  For example, if we are loading a <4 x float> with an
60940b57cec5SDimitry Andric     // alignment of 8, this check will fail but the legalizer will try again
60950b57cec5SDimitry Andric     // with 2 x <2 x float>, which will succeed with an alignment of 8.
60960b57cec5SDimitry Andric     return;
60970b57cec5SDimitry Andric   }
60980b57cec5SDimitry Andric 
60990b57cec5SDimitry Andric   EVT EltVT = ResVT.getVectorElementType();
61000b57cec5SDimitry Andric   unsigned NumElts = ResVT.getVectorNumElements();
61010b57cec5SDimitry Andric 
61020b57cec5SDimitry Andric   // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
61030b57cec5SDimitry Andric   // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
61040b57cec5SDimitry Andric   // loaded type to i16 and propagate the "real" type as the memory type.
61050b57cec5SDimitry Andric   bool NeedTrunc = false;
61060b57cec5SDimitry Andric   if (EltVT.getSizeInBits() < 16) {
61070b57cec5SDimitry Andric     EltVT = MVT::i16;
61080b57cec5SDimitry Andric     NeedTrunc = true;
61090b57cec5SDimitry Andric   }
61100b57cec5SDimitry Andric 
61110b57cec5SDimitry Andric   unsigned Opcode = 0;
61120b57cec5SDimitry Andric   SDVTList LdResVTs;
61135f757f3fSDimitry Andric   bool Load16x2 = false;
61140b57cec5SDimitry Andric 
61150b57cec5SDimitry Andric   switch (NumElts) {
61160b57cec5SDimitry Andric   default:
61170b57cec5SDimitry Andric     return;
61180b57cec5SDimitry Andric   case 2:
61190b57cec5SDimitry Andric     Opcode = NVPTXISD::LoadV2;
61200b57cec5SDimitry Andric     LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
61210b57cec5SDimitry Andric     break;
61220b57cec5SDimitry Andric   case 4: {
61230b57cec5SDimitry Andric     Opcode = NVPTXISD::LoadV4;
61240b57cec5SDimitry Andric     EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
61250b57cec5SDimitry Andric     LdResVTs = DAG.getVTList(ListVTs);
61260b57cec5SDimitry Andric     break;
61270b57cec5SDimitry Andric   }
61280b57cec5SDimitry Andric   case 8: {
61290b57cec5SDimitry Andric     // v8f16 is a special case. PTX doesn't have ld.v8.f16
61300b57cec5SDimitry Andric     // instruction. Instead, we split the vector into v2f16 chunks and
61310b57cec5SDimitry Andric     // load them with ld.v4.b32.
61325f757f3fSDimitry Andric     assert(Is16bitsType(EltVT.getSimpleVT()) && "Unsupported v8 vector type.");
61335f757f3fSDimitry Andric     Load16x2 = true;
61340b57cec5SDimitry Andric     Opcode = NVPTXISD::LoadV4;
61355f757f3fSDimitry Andric     EVT VVT;
61365f757f3fSDimitry Andric     switch (EltVT.getSimpleVT().SimpleTy) {
61375f757f3fSDimitry Andric     case MVT::f16:
61385f757f3fSDimitry Andric       VVT = MVT::v2f16;
61395f757f3fSDimitry Andric       break;
61405f757f3fSDimitry Andric     case MVT::bf16:
61415f757f3fSDimitry Andric       VVT = MVT::v2bf16;
61425f757f3fSDimitry Andric       break;
61435f757f3fSDimitry Andric     case MVT::i16:
61445f757f3fSDimitry Andric       VVT = MVT::v2i16;
61455f757f3fSDimitry Andric       break;
61465f757f3fSDimitry Andric     default:
61475f757f3fSDimitry Andric       llvm_unreachable("Unsupported v8 vector type.");
61485f757f3fSDimitry Andric     }
6149bdd1243dSDimitry Andric     EVT ListVTs[] = {VVT, VVT, VVT, VVT, MVT::Other};
61500b57cec5SDimitry Andric     LdResVTs = DAG.getVTList(ListVTs);
61510b57cec5SDimitry Andric     break;
61520b57cec5SDimitry Andric   }
61530b57cec5SDimitry Andric   }
61540b57cec5SDimitry Andric 
61550b57cec5SDimitry Andric   // Copy regular operands
61560b57cec5SDimitry Andric   SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
61570b57cec5SDimitry Andric 
61580b57cec5SDimitry Andric   // The select routine does not have access to the LoadSDNode instance, so
61590b57cec5SDimitry Andric   // pass along the extension information
61600b57cec5SDimitry Andric   OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
61610b57cec5SDimitry Andric 
61620b57cec5SDimitry Andric   SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
61630b57cec5SDimitry Andric                                           LD->getMemoryVT(),
61640b57cec5SDimitry Andric                                           LD->getMemOperand());
61650b57cec5SDimitry Andric 
61660b57cec5SDimitry Andric   SmallVector<SDValue, 8> ScalarRes;
61675f757f3fSDimitry Andric   if (Load16x2) {
61680b57cec5SDimitry Andric     // Split v2f16 subvectors back into individual elements.
61690b57cec5SDimitry Andric     NumElts /= 2;
61700b57cec5SDimitry Andric     for (unsigned i = 0; i < NumElts; ++i) {
61710b57cec5SDimitry Andric       SDValue SubVector = NewLD.getValue(i);
61720b57cec5SDimitry Andric       SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
61730b57cec5SDimitry Andric                                DAG.getIntPtrConstant(0, DL));
61740b57cec5SDimitry Andric       SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
61750b57cec5SDimitry Andric                                DAG.getIntPtrConstant(1, DL));
61760b57cec5SDimitry Andric       ScalarRes.push_back(E0);
61770b57cec5SDimitry Andric       ScalarRes.push_back(E1);
61780b57cec5SDimitry Andric     }
61790b57cec5SDimitry Andric   } else {
61800b57cec5SDimitry Andric     for (unsigned i = 0; i < NumElts; ++i) {
61810b57cec5SDimitry Andric       SDValue Res = NewLD.getValue(i);
61820b57cec5SDimitry Andric       if (NeedTrunc)
61830b57cec5SDimitry Andric         Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
61840b57cec5SDimitry Andric       ScalarRes.push_back(Res);
61850b57cec5SDimitry Andric     }
61860b57cec5SDimitry Andric   }
61870b57cec5SDimitry Andric 
61880b57cec5SDimitry Andric   SDValue LoadChain = NewLD.getValue(NumElts);
61890b57cec5SDimitry Andric 
61900b57cec5SDimitry Andric   SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes);
61910b57cec5SDimitry Andric 
61920b57cec5SDimitry Andric   Results.push_back(BuildVec);
61930b57cec5SDimitry Andric   Results.push_back(LoadChain);
61940b57cec5SDimitry Andric }
61950b57cec5SDimitry Andric 
61960b57cec5SDimitry Andric static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
61970b57cec5SDimitry Andric                                      SmallVectorImpl<SDValue> &Results) {
61980b57cec5SDimitry Andric   SDValue Chain = N->getOperand(0);
61990b57cec5SDimitry Andric   SDValue Intrin = N->getOperand(1);
62000b57cec5SDimitry Andric   SDLoc DL(N);
62010b57cec5SDimitry Andric 
62020b57cec5SDimitry Andric   // Get the intrinsic ID
62031db9f3b2SDimitry Andric   unsigned IntrinNo = Intrin.getNode()->getAsZExtVal();
62040b57cec5SDimitry Andric   switch (IntrinNo) {
62050b57cec5SDimitry Andric   default:
62060b57cec5SDimitry Andric     return;
62070b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_i:
62080b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_f:
62090b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_p:
62100b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_i:
62110b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_f:
62120b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_p: {
62130b57cec5SDimitry Andric     EVT ResVT = N->getValueType(0);
62140b57cec5SDimitry Andric 
62150b57cec5SDimitry Andric     if (ResVT.isVector()) {
62160b57cec5SDimitry Andric       // Vector LDG/LDU
62170b57cec5SDimitry Andric 
62180b57cec5SDimitry Andric       unsigned NumElts = ResVT.getVectorNumElements();
62190b57cec5SDimitry Andric       EVT EltVT = ResVT.getVectorElementType();
62200b57cec5SDimitry Andric 
62210b57cec5SDimitry Andric       // Since LDU/LDG are target nodes, we cannot rely on DAG type
62220b57cec5SDimitry Andric       // legalization.
62230b57cec5SDimitry Andric       // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
62240b57cec5SDimitry Andric       // loaded type to i16 and propagate the "real" type as the memory type.
62250b57cec5SDimitry Andric       bool NeedTrunc = false;
62260b57cec5SDimitry Andric       if (EltVT.getSizeInBits() < 16) {
62270b57cec5SDimitry Andric         EltVT = MVT::i16;
62280b57cec5SDimitry Andric         NeedTrunc = true;
62290b57cec5SDimitry Andric       }
62300b57cec5SDimitry Andric 
62310b57cec5SDimitry Andric       unsigned Opcode = 0;
62320b57cec5SDimitry Andric       SDVTList LdResVTs;
62330b57cec5SDimitry Andric 
62340b57cec5SDimitry Andric       switch (NumElts) {
62350b57cec5SDimitry Andric       default:
62360b57cec5SDimitry Andric         return;
62370b57cec5SDimitry Andric       case 2:
62380b57cec5SDimitry Andric         switch (IntrinNo) {
62390b57cec5SDimitry Andric         default:
62400b57cec5SDimitry Andric           return;
62410b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_i:
62420b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_f:
62430b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_p:
62440b57cec5SDimitry Andric           Opcode = NVPTXISD::LDGV2;
62450b57cec5SDimitry Andric           break;
62460b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_i:
62470b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_f:
62480b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_p:
62490b57cec5SDimitry Andric           Opcode = NVPTXISD::LDUV2;
62500b57cec5SDimitry Andric           break;
62510b57cec5SDimitry Andric         }
62520b57cec5SDimitry Andric         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
62530b57cec5SDimitry Andric         break;
62540b57cec5SDimitry Andric       case 4: {
62550b57cec5SDimitry Andric         switch (IntrinNo) {
62560b57cec5SDimitry Andric         default:
62570b57cec5SDimitry Andric           return;
62580b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_i:
62590b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_f:
62600b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_p:
62610b57cec5SDimitry Andric           Opcode = NVPTXISD::LDGV4;
62620b57cec5SDimitry Andric           break;
62630b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_i:
62640b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_f:
62650b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_p:
62660b57cec5SDimitry Andric           Opcode = NVPTXISD::LDUV4;
62670b57cec5SDimitry Andric           break;
62680b57cec5SDimitry Andric         }
62690b57cec5SDimitry Andric         EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
62700b57cec5SDimitry Andric         LdResVTs = DAG.getVTList(ListVTs);
62710b57cec5SDimitry Andric         break;
62720b57cec5SDimitry Andric       }
62730b57cec5SDimitry Andric       }
62740b57cec5SDimitry Andric 
62750b57cec5SDimitry Andric       SmallVector<SDValue, 8> OtherOps;
62760b57cec5SDimitry Andric 
62770b57cec5SDimitry Andric       // Copy regular operands
62780b57cec5SDimitry Andric 
62790b57cec5SDimitry Andric       OtherOps.push_back(Chain); // Chain
62800b57cec5SDimitry Andric                                  // Skip operand 1 (intrinsic ID)
62810b57cec5SDimitry Andric       // Others
62820b57cec5SDimitry Andric       OtherOps.append(N->op_begin() + 2, N->op_end());
62830b57cec5SDimitry Andric 
62840b57cec5SDimitry Andric       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
62850b57cec5SDimitry Andric 
62860b57cec5SDimitry Andric       SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
62870b57cec5SDimitry Andric                                               MemSD->getMemoryVT(),
62880b57cec5SDimitry Andric                                               MemSD->getMemOperand());
62890b57cec5SDimitry Andric 
62900b57cec5SDimitry Andric       SmallVector<SDValue, 4> ScalarRes;
62910b57cec5SDimitry Andric 
62920b57cec5SDimitry Andric       for (unsigned i = 0; i < NumElts; ++i) {
62930b57cec5SDimitry Andric         SDValue Res = NewLD.getValue(i);
62940b57cec5SDimitry Andric         if (NeedTrunc)
62950b57cec5SDimitry Andric           Res =
62960b57cec5SDimitry Andric               DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
62970b57cec5SDimitry Andric         ScalarRes.push_back(Res);
62980b57cec5SDimitry Andric       }
62990b57cec5SDimitry Andric 
63000b57cec5SDimitry Andric       SDValue LoadChain = NewLD.getValue(NumElts);
63010b57cec5SDimitry Andric 
63020b57cec5SDimitry Andric       SDValue BuildVec =
63030b57cec5SDimitry Andric           DAG.getBuildVector(ResVT, DL, ScalarRes);
63040b57cec5SDimitry Andric 
63050b57cec5SDimitry Andric       Results.push_back(BuildVec);
63060b57cec5SDimitry Andric       Results.push_back(LoadChain);
63070b57cec5SDimitry Andric     } else {
63080b57cec5SDimitry Andric       // i8 LDG/LDU
63090b57cec5SDimitry Andric       assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
63100b57cec5SDimitry Andric              "Custom handling of non-i8 ldu/ldg?");
63110b57cec5SDimitry Andric 
63120b57cec5SDimitry Andric       // Just copy all operands as-is
63130b57cec5SDimitry Andric       SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
63140b57cec5SDimitry Andric 
63150b57cec5SDimitry Andric       // Force output to i16
63160b57cec5SDimitry Andric       SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
63170b57cec5SDimitry Andric 
63180b57cec5SDimitry Andric       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
63190b57cec5SDimitry Andric 
63200b57cec5SDimitry Andric       // We make sure the memory type is i8, which will be used during isel
63210b57cec5SDimitry Andric       // to select the proper instruction.
63220b57cec5SDimitry Andric       SDValue NewLD =
63230b57cec5SDimitry Andric           DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
63240b57cec5SDimitry Andric                                   MVT::i8, MemSD->getMemOperand());
63250b57cec5SDimitry Andric 
63260b57cec5SDimitry Andric       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
63270b57cec5SDimitry Andric                                     NewLD.getValue(0)));
63280b57cec5SDimitry Andric       Results.push_back(NewLD.getValue(1));
63290b57cec5SDimitry Andric     }
63300b57cec5SDimitry Andric   }
63310b57cec5SDimitry Andric   }
63320b57cec5SDimitry Andric }
63330b57cec5SDimitry Andric 
63340fca6ea1SDimitry Andric static void ReplaceCopyFromReg_128(SDNode *N, SelectionDAG &DAG,
63350fca6ea1SDimitry Andric                                    SmallVectorImpl<SDValue> &Results) {
63360fca6ea1SDimitry Andric   // Change the CopyFromReg to output 2 64-bit results instead of a 128-bit
63370fca6ea1SDimitry Andric   // result so that it can pass the legalization
63380fca6ea1SDimitry Andric   SDLoc DL(N);
63390fca6ea1SDimitry Andric   SDValue Chain = N->getOperand(0);
63400fca6ea1SDimitry Andric   SDValue Reg = N->getOperand(1);
63410fca6ea1SDimitry Andric   SDValue Glue = N->getOperand(2);
63420fca6ea1SDimitry Andric 
63430fca6ea1SDimitry Andric   assert(Reg.getValueType() == MVT::i128 &&
63440fca6ea1SDimitry Andric          "Custom lowering for CopyFromReg with 128-bit reg only");
63450fca6ea1SDimitry Andric   SmallVector<EVT, 4> ResultsType = {MVT::i64, MVT::i64, N->getValueType(1),
63460fca6ea1SDimitry Andric                                      N->getValueType(2)};
63470fca6ea1SDimitry Andric   SmallVector<SDValue, 3> NewOps = {Chain, Reg, Glue};
63480fca6ea1SDimitry Andric 
63490fca6ea1SDimitry Andric   SDValue NewValue = DAG.getNode(ISD::CopyFromReg, DL, ResultsType, NewOps);
63500fca6ea1SDimitry Andric   SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
63510fca6ea1SDimitry Andric                              {NewValue.getValue(0), NewValue.getValue(1)});
63520fca6ea1SDimitry Andric 
63530fca6ea1SDimitry Andric   Results.push_back(Pair);
63540fca6ea1SDimitry Andric   Results.push_back(NewValue.getValue(2));
63550fca6ea1SDimitry Andric   Results.push_back(NewValue.getValue(3));
63560fca6ea1SDimitry Andric }
63570fca6ea1SDimitry Andric 
63580b57cec5SDimitry Andric void NVPTXTargetLowering::ReplaceNodeResults(
63590b57cec5SDimitry Andric     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
63600b57cec5SDimitry Andric   switch (N->getOpcode()) {
63610b57cec5SDimitry Andric   default:
63620b57cec5SDimitry Andric     report_fatal_error("Unhandled custom legalization");
63630b57cec5SDimitry Andric   case ISD::LOAD:
63640b57cec5SDimitry Andric     ReplaceLoadVector(N, DAG, Results);
63650b57cec5SDimitry Andric     return;
63660b57cec5SDimitry Andric   case ISD::INTRINSIC_W_CHAIN:
63670b57cec5SDimitry Andric     ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
63680b57cec5SDimitry Andric     return;
63690fca6ea1SDimitry Andric   case ISD::CopyFromReg:
63700fca6ea1SDimitry Andric     ReplaceCopyFromReg_128(N, DAG, Results);
63710fca6ea1SDimitry Andric     return;
63720b57cec5SDimitry Andric   }
63730b57cec5SDimitry Andric }
63740b57cec5SDimitry Andric 
637581ad6265SDimitry Andric NVPTXTargetLowering::AtomicExpansionKind
637681ad6265SDimitry Andric NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
637781ad6265SDimitry Andric   Type *Ty = AI->getValOperand()->getType();
637881ad6265SDimitry Andric 
637981ad6265SDimitry Andric   if (AI->isFloatingPointOperation()) {
638081ad6265SDimitry Andric     if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
63810fca6ea1SDimitry Andric       if (Ty->isHalfTy() && STI.getSmVersion() >= 70 &&
63820fca6ea1SDimitry Andric           STI.getPTXVersion() >= 63)
63830fca6ea1SDimitry Andric         return AtomicExpansionKind::None;
63840fca6ea1SDimitry Andric       if (Ty->isBFloatTy() && STI.getSmVersion() >= 90 &&
63850fca6ea1SDimitry Andric           STI.getPTXVersion() >= 78)
63860fca6ea1SDimitry Andric         return AtomicExpansionKind::None;
638781ad6265SDimitry Andric       if (Ty->isFloatTy())
638881ad6265SDimitry Andric         return AtomicExpansionKind::None;
638981ad6265SDimitry Andric       if (Ty->isDoubleTy() && STI.hasAtomAddF64())
639081ad6265SDimitry Andric         return AtomicExpansionKind::None;
639181ad6265SDimitry Andric     }
639281ad6265SDimitry Andric     return AtomicExpansionKind::CmpXChg;
639381ad6265SDimitry Andric   }
639481ad6265SDimitry Andric 
639581ad6265SDimitry Andric   assert(Ty->isIntegerTy() && "Ty should be integer at this point");
639681ad6265SDimitry Andric   auto ITy = cast<llvm::IntegerType>(Ty);
639781ad6265SDimitry Andric 
639881ad6265SDimitry Andric   switch (AI->getOperation()) {
639981ad6265SDimitry Andric   default:
640081ad6265SDimitry Andric     return AtomicExpansionKind::CmpXChg;
640181ad6265SDimitry Andric   case AtomicRMWInst::BinOp::And:
640281ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Or:
640381ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Xor:
640481ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Xchg:
640581ad6265SDimitry Andric     switch (ITy->getBitWidth()) {
640681ad6265SDimitry Andric     case 8:
640781ad6265SDimitry Andric     case 16:
640881ad6265SDimitry Andric       return AtomicExpansionKind::CmpXChg;
640981ad6265SDimitry Andric     case 32:
641081ad6265SDimitry Andric       return AtomicExpansionKind::None;
641181ad6265SDimitry Andric     case 64:
641281ad6265SDimitry Andric       if (STI.hasAtomBitwise64())
641381ad6265SDimitry Andric         return AtomicExpansionKind::None;
641481ad6265SDimitry Andric       return AtomicExpansionKind::CmpXChg;
641581ad6265SDimitry Andric     default:
641681ad6265SDimitry Andric       llvm_unreachable("unsupported width encountered");
641781ad6265SDimitry Andric     }
641881ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Add:
641981ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Sub:
642081ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Max:
642181ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Min:
642281ad6265SDimitry Andric   case AtomicRMWInst::BinOp::UMax:
642381ad6265SDimitry Andric   case AtomicRMWInst::BinOp::UMin:
642481ad6265SDimitry Andric     switch (ITy->getBitWidth()) {
642581ad6265SDimitry Andric     case 8:
642681ad6265SDimitry Andric     case 16:
642781ad6265SDimitry Andric       return AtomicExpansionKind::CmpXChg;
642881ad6265SDimitry Andric     case 32:
642981ad6265SDimitry Andric       return AtomicExpansionKind::None;
643081ad6265SDimitry Andric     case 64:
643181ad6265SDimitry Andric       if (STI.hasAtomMinMax64())
643281ad6265SDimitry Andric         return AtomicExpansionKind::None;
643381ad6265SDimitry Andric       return AtomicExpansionKind::CmpXChg;
643481ad6265SDimitry Andric     default:
643581ad6265SDimitry Andric       llvm_unreachable("unsupported width encountered");
643681ad6265SDimitry Andric     }
643781ad6265SDimitry Andric   }
643881ad6265SDimitry Andric 
643981ad6265SDimitry Andric   return AtomicExpansionKind::CmpXChg;
644081ad6265SDimitry Andric }
644181ad6265SDimitry Andric 
64420b57cec5SDimitry Andric // Pin NVPTXTargetObjectFile's vtables to this file.
644381ad6265SDimitry Andric NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
64440b57cec5SDimitry Andric 
64450b57cec5SDimitry Andric MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal(
64460b57cec5SDimitry Andric     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
64470b57cec5SDimitry Andric   return getDataSection();
64480b57cec5SDimitry Andric }
6449