106c3fb27SDimitry Andric //===- NVPTX.cpp ----------------------------------------------------------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric 906c3fb27SDimitry Andric #include "ABIInfoImpl.h" 1006c3fb27SDimitry Andric #include "TargetInfo.h" 1106c3fb27SDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h" 1206c3fb27SDimitry Andric 1306c3fb27SDimitry Andric using namespace clang; 1406c3fb27SDimitry Andric using namespace clang::CodeGen; 1506c3fb27SDimitry Andric 1606c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1706c3fb27SDimitry Andric // NVPTX ABI Implementation 1806c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1906c3fb27SDimitry Andric 2006c3fb27SDimitry Andric namespace { 2106c3fb27SDimitry Andric 2206c3fb27SDimitry Andric class NVPTXTargetCodeGenInfo; 2306c3fb27SDimitry Andric 2406c3fb27SDimitry Andric class NVPTXABIInfo : public ABIInfo { 2506c3fb27SDimitry Andric NVPTXTargetCodeGenInfo &CGInfo; 2606c3fb27SDimitry Andric 2706c3fb27SDimitry Andric public: 2806c3fb27SDimitry Andric NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) 2906c3fb27SDimitry Andric : ABIInfo(CGT), CGInfo(Info) {} 3006c3fb27SDimitry Andric 3106c3fb27SDimitry Andric ABIArgInfo classifyReturnType(QualType RetTy) const; 3206c3fb27SDimitry Andric ABIArgInfo classifyArgumentType(QualType Ty) const; 3306c3fb27SDimitry Andric 3406c3fb27SDimitry Andric void computeInfo(CGFunctionInfo &FI) const override; 35*0fca6ea1SDimitry Andric RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 36*0fca6ea1SDimitry Andric AggValueSlot Slot) const override; 3706c3fb27SDimitry Andric bool isUnsupportedType(QualType T) const; 3806c3fb27SDimitry Andric ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; 3906c3fb27SDimitry Andric }; 4006c3fb27SDimitry Andric 4106c3fb27SDimitry Andric class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { 4206c3fb27SDimitry Andric public: 4306c3fb27SDimitry Andric NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) 4406c3fb27SDimitry Andric : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {} 4506c3fb27SDimitry Andric 4606c3fb27SDimitry Andric void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 4706c3fb27SDimitry Andric CodeGen::CodeGenModule &M) const override; 4806c3fb27SDimitry Andric bool shouldEmitStaticExternCAliases() const override; 4906c3fb27SDimitry Andric 50*0fca6ea1SDimitry Andric llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, 51*0fca6ea1SDimitry Andric llvm::PointerType *T, 52*0fca6ea1SDimitry Andric QualType QT) const override; 53*0fca6ea1SDimitry Andric 5406c3fb27SDimitry Andric llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { 5506c3fb27SDimitry Andric // On the device side, surface reference is represented as an object handle 5606c3fb27SDimitry Andric // in 64-bit integer. 5706c3fb27SDimitry Andric return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); 5806c3fb27SDimitry Andric } 5906c3fb27SDimitry Andric 6006c3fb27SDimitry Andric llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { 6106c3fb27SDimitry Andric // On the device side, texture reference is represented as an object handle 6206c3fb27SDimitry Andric // in 64-bit integer. 6306c3fb27SDimitry Andric return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); 6406c3fb27SDimitry Andric } 6506c3fb27SDimitry Andric 6606c3fb27SDimitry Andric bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, 6706c3fb27SDimitry Andric LValue Src) const override { 6806c3fb27SDimitry Andric emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); 6906c3fb27SDimitry Andric return true; 7006c3fb27SDimitry Andric } 7106c3fb27SDimitry Andric 7206c3fb27SDimitry Andric bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, 7306c3fb27SDimitry Andric LValue Src) const override { 7406c3fb27SDimitry Andric emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); 7506c3fb27SDimitry Andric return true; 7606c3fb27SDimitry Andric } 7706c3fb27SDimitry Andric 7806c3fb27SDimitry Andric // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the 7906c3fb27SDimitry Andric // resulting MDNode to the nvvm.annotations MDNode. 8006c3fb27SDimitry Andric static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, 8106c3fb27SDimitry Andric int Operand); 8206c3fb27SDimitry Andric 835f757f3fSDimitry Andric private: 8406c3fb27SDimitry Andric static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, 8506c3fb27SDimitry Andric LValue Src) { 8606c3fb27SDimitry Andric llvm::Value *Handle = nullptr; 8706c3fb27SDimitry Andric llvm::Constant *C = 88*0fca6ea1SDimitry Andric llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF)); 8906c3fb27SDimitry Andric // Lookup `addrspacecast` through the constant pointer if any. 9006c3fb27SDimitry Andric if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) 9106c3fb27SDimitry Andric C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); 9206c3fb27SDimitry Andric if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { 9306c3fb27SDimitry Andric // Load the handle from the specific global variable using 9406c3fb27SDimitry Andric // `nvvm.texsurf.handle.internal` intrinsic. 9506c3fb27SDimitry Andric Handle = CGF.EmitRuntimeCall( 9606c3fb27SDimitry Andric CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, 9706c3fb27SDimitry Andric {GV->getType()}), 9806c3fb27SDimitry Andric {GV}, "texsurf_handle"); 9906c3fb27SDimitry Andric } else 10006c3fb27SDimitry Andric Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); 10106c3fb27SDimitry Andric CGF.EmitStoreOfScalar(Handle, Dst); 10206c3fb27SDimitry Andric } 10306c3fb27SDimitry Andric }; 10406c3fb27SDimitry Andric 10506c3fb27SDimitry Andric /// Checks if the type is unsupported directly by the current target. 10606c3fb27SDimitry Andric bool NVPTXABIInfo::isUnsupportedType(QualType T) const { 10706c3fb27SDimitry Andric ASTContext &Context = getContext(); 10806c3fb27SDimitry Andric if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) 10906c3fb27SDimitry Andric return true; 11006c3fb27SDimitry Andric if (!Context.getTargetInfo().hasFloat128Type() && 11106c3fb27SDimitry Andric (T->isFloat128Type() || 11206c3fb27SDimitry Andric (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) 11306c3fb27SDimitry Andric return true; 11406c3fb27SDimitry Andric if (const auto *EIT = T->getAs<BitIntType>()) 11506c3fb27SDimitry Andric return EIT->getNumBits() > 11606c3fb27SDimitry Andric (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); 11706c3fb27SDimitry Andric if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && 11806c3fb27SDimitry Andric Context.getTypeSize(T) > 64U) 11906c3fb27SDimitry Andric return true; 12006c3fb27SDimitry Andric if (const auto *AT = T->getAsArrayTypeUnsafe()) 12106c3fb27SDimitry Andric return isUnsupportedType(AT->getElementType()); 12206c3fb27SDimitry Andric const auto *RT = T->getAs<RecordType>(); 12306c3fb27SDimitry Andric if (!RT) 12406c3fb27SDimitry Andric return false; 12506c3fb27SDimitry Andric const RecordDecl *RD = RT->getDecl(); 12606c3fb27SDimitry Andric 12706c3fb27SDimitry Andric // If this is a C++ record, check the bases first. 12806c3fb27SDimitry Andric if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) 12906c3fb27SDimitry Andric for (const CXXBaseSpecifier &I : CXXRD->bases()) 13006c3fb27SDimitry Andric if (isUnsupportedType(I.getType())) 13106c3fb27SDimitry Andric return true; 13206c3fb27SDimitry Andric 13306c3fb27SDimitry Andric for (const FieldDecl *I : RD->fields()) 13406c3fb27SDimitry Andric if (isUnsupportedType(I->getType())) 13506c3fb27SDimitry Andric return true; 13606c3fb27SDimitry Andric return false; 13706c3fb27SDimitry Andric } 13806c3fb27SDimitry Andric 13906c3fb27SDimitry Andric /// Coerce the given type into an array with maximum allowed size of elements. 14006c3fb27SDimitry Andric ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, 14106c3fb27SDimitry Andric unsigned MaxSize) const { 14206c3fb27SDimitry Andric // Alignment and Size are measured in bits. 14306c3fb27SDimitry Andric const uint64_t Size = getContext().getTypeSize(Ty); 14406c3fb27SDimitry Andric const uint64_t Alignment = getContext().getTypeAlign(Ty); 14506c3fb27SDimitry Andric const unsigned Div = std::min<unsigned>(MaxSize, Alignment); 14606c3fb27SDimitry Andric llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div); 14706c3fb27SDimitry Andric const uint64_t NumElements = (Size + Div - 1) / Div; 14806c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); 14906c3fb27SDimitry Andric } 15006c3fb27SDimitry Andric 15106c3fb27SDimitry Andric ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { 15206c3fb27SDimitry Andric if (RetTy->isVoidType()) 15306c3fb27SDimitry Andric return ABIArgInfo::getIgnore(); 15406c3fb27SDimitry Andric 15506c3fb27SDimitry Andric if (getContext().getLangOpts().OpenMP && 15606c3fb27SDimitry Andric getContext().getLangOpts().OpenMPIsTargetDevice && 15706c3fb27SDimitry Andric isUnsupportedType(RetTy)) 15806c3fb27SDimitry Andric return coerceToIntArrayWithLimit(RetTy, 64); 15906c3fb27SDimitry Andric 16006c3fb27SDimitry Andric // note: this is different from default ABI 16106c3fb27SDimitry Andric if (!RetTy->isScalarType()) 16206c3fb27SDimitry Andric return ABIArgInfo::getDirect(); 16306c3fb27SDimitry Andric 16406c3fb27SDimitry Andric // Treat an enum type as its underlying type. 16506c3fb27SDimitry Andric if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 16606c3fb27SDimitry Andric RetTy = EnumTy->getDecl()->getIntegerType(); 16706c3fb27SDimitry Andric 16806c3fb27SDimitry Andric return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) 16906c3fb27SDimitry Andric : ABIArgInfo::getDirect()); 17006c3fb27SDimitry Andric } 17106c3fb27SDimitry Andric 17206c3fb27SDimitry Andric ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { 17306c3fb27SDimitry Andric // Treat an enum type as its underlying type. 17406c3fb27SDimitry Andric if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 17506c3fb27SDimitry Andric Ty = EnumTy->getDecl()->getIntegerType(); 17606c3fb27SDimitry Andric 17706c3fb27SDimitry Andric // Return aggregates type as indirect by value 17806c3fb27SDimitry Andric if (isAggregateTypeForABI(Ty)) { 17906c3fb27SDimitry Andric // Under CUDA device compilation, tex/surf builtin types are replaced with 18006c3fb27SDimitry Andric // object types and passed directly. 18106c3fb27SDimitry Andric if (getContext().getLangOpts().CUDAIsDevice) { 18206c3fb27SDimitry Andric if (Ty->isCUDADeviceBuiltinSurfaceType()) 18306c3fb27SDimitry Andric return ABIArgInfo::getDirect( 18406c3fb27SDimitry Andric CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); 18506c3fb27SDimitry Andric if (Ty->isCUDADeviceBuiltinTextureType()) 18606c3fb27SDimitry Andric return ABIArgInfo::getDirect( 18706c3fb27SDimitry Andric CGInfo.getCUDADeviceBuiltinTextureDeviceType()); 18806c3fb27SDimitry Andric } 18906c3fb27SDimitry Andric return getNaturalAlignIndirect(Ty, /* byval */ true); 19006c3fb27SDimitry Andric } 19106c3fb27SDimitry Andric 19206c3fb27SDimitry Andric if (const auto *EIT = Ty->getAs<BitIntType>()) { 19306c3fb27SDimitry Andric if ((EIT->getNumBits() > 128) || 19406c3fb27SDimitry Andric (!getContext().getTargetInfo().hasInt128Type() && 19506c3fb27SDimitry Andric EIT->getNumBits() > 64)) 19606c3fb27SDimitry Andric return getNaturalAlignIndirect(Ty, /* byval */ true); 19706c3fb27SDimitry Andric } 19806c3fb27SDimitry Andric 19906c3fb27SDimitry Andric return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 20006c3fb27SDimitry Andric : ABIArgInfo::getDirect()); 20106c3fb27SDimitry Andric } 20206c3fb27SDimitry Andric 20306c3fb27SDimitry Andric void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { 20406c3fb27SDimitry Andric if (!getCXXABI().classifyReturnType(FI)) 20506c3fb27SDimitry Andric FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 206*0fca6ea1SDimitry Andric 207*0fca6ea1SDimitry Andric for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments())) 208*0fca6ea1SDimitry Andric I.info = ArgumentsCount < FI.getNumRequiredArgs() 209*0fca6ea1SDimitry Andric ? classifyArgumentType(I.type) 210*0fca6ea1SDimitry Andric : ABIArgInfo::getDirect(); 21106c3fb27SDimitry Andric 21206c3fb27SDimitry Andric // Always honor user-specified calling convention. 21306c3fb27SDimitry Andric if (FI.getCallingConvention() != llvm::CallingConv::C) 21406c3fb27SDimitry Andric return; 21506c3fb27SDimitry Andric 21606c3fb27SDimitry Andric FI.setEffectiveCallingConvention(getRuntimeCC()); 21706c3fb27SDimitry Andric } 21806c3fb27SDimitry Andric 219*0fca6ea1SDimitry Andric RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 220*0fca6ea1SDimitry Andric QualType Ty, AggValueSlot Slot) const { 221*0fca6ea1SDimitry Andric return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false, 222*0fca6ea1SDimitry Andric getContext().getTypeInfoInChars(Ty), 223*0fca6ea1SDimitry Andric CharUnits::fromQuantity(1), 224*0fca6ea1SDimitry Andric /*AllowHigherAlign=*/true, Slot); 22506c3fb27SDimitry Andric } 22606c3fb27SDimitry Andric 22706c3fb27SDimitry Andric void NVPTXTargetCodeGenInfo::setTargetAttributes( 22806c3fb27SDimitry Andric const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { 22906c3fb27SDimitry Andric if (GV->isDeclaration()) 23006c3fb27SDimitry Andric return; 23106c3fb27SDimitry Andric const VarDecl *VD = dyn_cast_or_null<VarDecl>(D); 23206c3fb27SDimitry Andric if (VD) { 23306c3fb27SDimitry Andric if (M.getLangOpts().CUDA) { 23406c3fb27SDimitry Andric if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) 23506c3fb27SDimitry Andric addNVVMMetadata(GV, "surface", 1); 23606c3fb27SDimitry Andric else if (VD->getType()->isCUDADeviceBuiltinTextureType()) 23706c3fb27SDimitry Andric addNVVMMetadata(GV, "texture", 1); 23806c3fb27SDimitry Andric return; 23906c3fb27SDimitry Andric } 24006c3fb27SDimitry Andric } 24106c3fb27SDimitry Andric 24206c3fb27SDimitry Andric const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); 24306c3fb27SDimitry Andric if (!FD) return; 24406c3fb27SDimitry Andric 24506c3fb27SDimitry Andric llvm::Function *F = cast<llvm::Function>(GV); 24606c3fb27SDimitry Andric 24706c3fb27SDimitry Andric // Perform special handling in OpenCL mode 24806c3fb27SDimitry Andric if (M.getLangOpts().OpenCL) { 24906c3fb27SDimitry Andric // Use OpenCL function attributes to check for kernel functions 25006c3fb27SDimitry Andric // By default, all functions are device functions 25106c3fb27SDimitry Andric if (FD->hasAttr<OpenCLKernelAttr>()) { 25206c3fb27SDimitry Andric // OpenCL __kernel functions get kernel metadata 25306c3fb27SDimitry Andric // Create !{<func-ref>, metadata !"kernel", i32 1} node 25406c3fb27SDimitry Andric addNVVMMetadata(F, "kernel", 1); 25506c3fb27SDimitry Andric // And kernel functions are not subject to inlining 25606c3fb27SDimitry Andric F->addFnAttr(llvm::Attribute::NoInline); 25706c3fb27SDimitry Andric } 25806c3fb27SDimitry Andric } 25906c3fb27SDimitry Andric 26006c3fb27SDimitry Andric // Perform special handling in CUDA mode. 26106c3fb27SDimitry Andric if (M.getLangOpts().CUDA) { 26206c3fb27SDimitry Andric // CUDA __global__ functions get a kernel metadata entry. Since 26306c3fb27SDimitry Andric // __global__ functions cannot be called from the device, we do not 26406c3fb27SDimitry Andric // need to set the noinline attribute. 26506c3fb27SDimitry Andric if (FD->hasAttr<CUDAGlobalAttr>()) { 26606c3fb27SDimitry Andric // Create !{<func-ref>, metadata !"kernel", i32 1} node 26706c3fb27SDimitry Andric addNVVMMetadata(F, "kernel", 1); 26806c3fb27SDimitry Andric } 2695f757f3fSDimitry Andric if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) 2705f757f3fSDimitry Andric M.handleCUDALaunchBoundsAttr(F, Attr); 27106c3fb27SDimitry Andric } 27206c3fb27SDimitry Andric 27306c3fb27SDimitry Andric // Attach kernel metadata directly if compiling for NVPTX. 27406c3fb27SDimitry Andric if (FD->hasAttr<NVPTXKernelAttr>()) { 27506c3fb27SDimitry Andric addNVVMMetadata(F, "kernel", 1); 27606c3fb27SDimitry Andric } 27706c3fb27SDimitry Andric } 27806c3fb27SDimitry Andric 27906c3fb27SDimitry Andric void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, 28006c3fb27SDimitry Andric StringRef Name, int Operand) { 28106c3fb27SDimitry Andric llvm::Module *M = GV->getParent(); 28206c3fb27SDimitry Andric llvm::LLVMContext &Ctx = M->getContext(); 28306c3fb27SDimitry Andric 28406c3fb27SDimitry Andric // Get "nvvm.annotations" metadata node 28506c3fb27SDimitry Andric llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); 28606c3fb27SDimitry Andric 28706c3fb27SDimitry Andric llvm::Metadata *MDVals[] = { 28806c3fb27SDimitry Andric llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), 28906c3fb27SDimitry Andric llvm::ConstantAsMetadata::get( 29006c3fb27SDimitry Andric llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; 29106c3fb27SDimitry Andric // Append metadata to nvvm.annotations 29206c3fb27SDimitry Andric MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 29306c3fb27SDimitry Andric } 29406c3fb27SDimitry Andric 29506c3fb27SDimitry Andric bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { 29606c3fb27SDimitry Andric return false; 29706c3fb27SDimitry Andric } 298*0fca6ea1SDimitry Andric 299*0fca6ea1SDimitry Andric llvm::Constant * 300*0fca6ea1SDimitry Andric NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, 301*0fca6ea1SDimitry Andric llvm::PointerType *PT, 302*0fca6ea1SDimitry Andric QualType QT) const { 303*0fca6ea1SDimitry Andric auto &Ctx = CGM.getContext(); 304*0fca6ea1SDimitry Andric if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local)) 305*0fca6ea1SDimitry Andric return llvm::ConstantPointerNull::get(PT); 306*0fca6ea1SDimitry Andric 307*0fca6ea1SDimitry Andric auto NPT = llvm::PointerType::get( 308*0fca6ea1SDimitry Andric PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); 309*0fca6ea1SDimitry Andric return llvm::ConstantExpr::getAddrSpaceCast( 310*0fca6ea1SDimitry Andric llvm::ConstantPointerNull::get(NPT), PT); 311*0fca6ea1SDimitry Andric } 31206c3fb27SDimitry Andric } 31306c3fb27SDimitry Andric 3145f757f3fSDimitry Andric void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, 3155f757f3fSDimitry Andric const CUDALaunchBoundsAttr *Attr, 3165f757f3fSDimitry Andric int32_t *MaxThreadsVal, 3175f757f3fSDimitry Andric int32_t *MinBlocksVal, 3185f757f3fSDimitry Andric int32_t *MaxClusterRankVal) { 3195f757f3fSDimitry Andric // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node 3205f757f3fSDimitry Andric llvm::APSInt MaxThreads(32); 3215f757f3fSDimitry Andric MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext()); 3225f757f3fSDimitry Andric if (MaxThreads > 0) { 3235f757f3fSDimitry Andric if (MaxThreadsVal) 3245f757f3fSDimitry Andric *MaxThreadsVal = MaxThreads.getExtValue(); 3255f757f3fSDimitry Andric if (F) { 3265f757f3fSDimitry Andric // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node 3275f757f3fSDimitry Andric NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx", 3285f757f3fSDimitry Andric MaxThreads.getExtValue()); 3295f757f3fSDimitry Andric } 3305f757f3fSDimitry Andric } 3315f757f3fSDimitry Andric 3325f757f3fSDimitry Andric // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it 3335f757f3fSDimitry Andric // was not specified in __launch_bounds__ or if the user specified a 0 value, 3345f757f3fSDimitry Andric // we don't have to add a PTX directive. 3355f757f3fSDimitry Andric if (Attr->getMinBlocks()) { 3365f757f3fSDimitry Andric llvm::APSInt MinBlocks(32); 3375f757f3fSDimitry Andric MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext()); 3385f757f3fSDimitry Andric if (MinBlocks > 0) { 3395f757f3fSDimitry Andric if (MinBlocksVal) 3405f757f3fSDimitry Andric *MinBlocksVal = MinBlocks.getExtValue(); 3415f757f3fSDimitry Andric if (F) { 3425f757f3fSDimitry Andric // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node 3435f757f3fSDimitry Andric NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm", 3445f757f3fSDimitry Andric MinBlocks.getExtValue()); 3455f757f3fSDimitry Andric } 3465f757f3fSDimitry Andric } 3475f757f3fSDimitry Andric } 3485f757f3fSDimitry Andric if (Attr->getMaxBlocks()) { 3495f757f3fSDimitry Andric llvm::APSInt MaxBlocks(32); 3505f757f3fSDimitry Andric MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext()); 3515f757f3fSDimitry Andric if (MaxBlocks > 0) { 3525f757f3fSDimitry Andric if (MaxClusterRankVal) 3535f757f3fSDimitry Andric *MaxClusterRankVal = MaxBlocks.getExtValue(); 3545f757f3fSDimitry Andric if (F) { 3555f757f3fSDimitry Andric // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node 3565f757f3fSDimitry Andric NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank", 3575f757f3fSDimitry Andric MaxBlocks.getExtValue()); 3585f757f3fSDimitry Andric } 3595f757f3fSDimitry Andric } 3605f757f3fSDimitry Andric } 3615f757f3fSDimitry Andric } 3625f757f3fSDimitry Andric 36306c3fb27SDimitry Andric std::unique_ptr<TargetCodeGenInfo> 36406c3fb27SDimitry Andric CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { 36506c3fb27SDimitry Andric return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes()); 36606c3fb27SDimitry Andric } 367