1 //===- NVPTX.cpp ----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ABIInfoImpl.h" 10 #include "TargetInfo.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/IR/CallingConv.h" 13 #include "llvm/IR/IntrinsicsNVPTX.h" 14 15 using namespace clang; 16 using namespace clang::CodeGen; 17 18 //===----------------------------------------------------------------------===// 19 // NVPTX ABI Implementation 20 //===----------------------------------------------------------------------===// 21 22 namespace { 23 24 class NVPTXTargetCodeGenInfo; 25 26 class NVPTXABIInfo : public ABIInfo { 27 NVPTXTargetCodeGenInfo &CGInfo; 28 29 public: 30 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) 31 : ABIInfo(CGT), CGInfo(Info) {} 32 33 ABIArgInfo classifyReturnType(QualType RetTy) const; 34 ABIArgInfo classifyArgumentType(QualType Ty) const; 35 36 void computeInfo(CGFunctionInfo &FI) const override; 37 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 38 AggValueSlot Slot) const override; 39 bool isUnsupportedType(QualType T) const; 40 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; 41 }; 42 43 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { 44 public: 45 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) 46 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {} 47 48 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 49 CodeGen::CodeGenModule &M) const override; 50 bool shouldEmitStaticExternCAliases() const override; 51 52 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, 53 llvm::PointerType *T, 54 QualType QT) const override; 55 56 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { 57 // On the device side, surface reference is represented as an object handle 58 // in 64-bit integer. 59 return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); 60 } 61 62 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { 63 // On the device side, texture reference is represented as an object handle 64 // in 64-bit integer. 65 return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); 66 } 67 68 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, 69 LValue Src) const override { 70 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); 71 return true; 72 } 73 74 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, 75 LValue Src) const override { 76 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); 77 return true; 78 } 79 80 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the 81 // resulting MDNode to the nvvm.annotations MDNode. 82 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, 83 int Operand); 84 85 static void 86 addGridConstantNVVMMetadata(llvm::GlobalValue *GV, 87 const SmallVectorImpl<int> &GridConstantArgs); 88 89 private: 90 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, 91 LValue Src) { 92 llvm::Value *Handle = nullptr; 93 llvm::Constant *C = 94 llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF)); 95 // Lookup `addrspacecast` through the constant pointer if any. 96 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) 97 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); 98 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { 99 // Load the handle from the specific global variable using 100 // `nvvm.texsurf.handle.internal` intrinsic. 101 Handle = CGF.EmitRuntimeCall( 102 CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, 103 {GV->getType()}), 104 {GV}, "texsurf_handle"); 105 } else 106 Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); 107 CGF.EmitStoreOfScalar(Handle, Dst); 108 } 109 }; 110 111 /// Checks if the type is unsupported directly by the current target. 112 bool NVPTXABIInfo::isUnsupportedType(QualType T) const { 113 ASTContext &Context = getContext(); 114 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) 115 return true; 116 if (!Context.getTargetInfo().hasFloat128Type() && 117 (T->isFloat128Type() || 118 (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) 119 return true; 120 if (const auto *EIT = T->getAs<BitIntType>()) 121 return EIT->getNumBits() > 122 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); 123 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && 124 Context.getTypeSize(T) > 64U) 125 return true; 126 if (const auto *AT = T->getAsArrayTypeUnsafe()) 127 return isUnsupportedType(AT->getElementType()); 128 const auto *RT = T->getAs<RecordType>(); 129 if (!RT) 130 return false; 131 const RecordDecl *RD = RT->getDecl(); 132 133 // If this is a C++ record, check the bases first. 134 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) 135 for (const CXXBaseSpecifier &I : CXXRD->bases()) 136 if (isUnsupportedType(I.getType())) 137 return true; 138 139 for (const FieldDecl *I : RD->fields()) 140 if (isUnsupportedType(I->getType())) 141 return true; 142 return false; 143 } 144 145 /// Coerce the given type into an array with maximum allowed size of elements. 146 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, 147 unsigned MaxSize) const { 148 // Alignment and Size are measured in bits. 149 const uint64_t Size = getContext().getTypeSize(Ty); 150 const uint64_t Alignment = getContext().getTypeAlign(Ty); 151 const unsigned Div = std::min<unsigned>(MaxSize, Alignment); 152 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div); 153 const uint64_t NumElements = (Size + Div - 1) / Div; 154 return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); 155 } 156 157 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { 158 if (RetTy->isVoidType()) 159 return ABIArgInfo::getIgnore(); 160 161 if (getContext().getLangOpts().OpenMP && 162 getContext().getLangOpts().OpenMPIsTargetDevice && 163 isUnsupportedType(RetTy)) 164 return coerceToIntArrayWithLimit(RetTy, 64); 165 166 // note: this is different from default ABI 167 if (!RetTy->isScalarType()) 168 return ABIArgInfo::getDirect(); 169 170 // Treat an enum type as its underlying type. 171 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 172 RetTy = EnumTy->getDecl()->getIntegerType(); 173 174 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) 175 : ABIArgInfo::getDirect()); 176 } 177 178 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { 179 // Treat an enum type as its underlying type. 180 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 181 Ty = EnumTy->getDecl()->getIntegerType(); 182 183 // Return aggregates type as indirect by value 184 if (isAggregateTypeForABI(Ty)) { 185 // Under CUDA device compilation, tex/surf builtin types are replaced with 186 // object types and passed directly. 187 if (getContext().getLangOpts().CUDAIsDevice) { 188 if (Ty->isCUDADeviceBuiltinSurfaceType()) 189 return ABIArgInfo::getDirect( 190 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); 191 if (Ty->isCUDADeviceBuiltinTextureType()) 192 return ABIArgInfo::getDirect( 193 CGInfo.getCUDADeviceBuiltinTextureDeviceType()); 194 } 195 return getNaturalAlignIndirect(Ty, /* byval */ true); 196 } 197 198 if (const auto *EIT = Ty->getAs<BitIntType>()) { 199 if ((EIT->getNumBits() > 128) || 200 (!getContext().getTargetInfo().hasInt128Type() && 201 EIT->getNumBits() > 64)) 202 return getNaturalAlignIndirect(Ty, /* byval */ true); 203 } 204 205 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 206 : ABIArgInfo::getDirect()); 207 } 208 209 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { 210 if (!getCXXABI().classifyReturnType(FI)) 211 FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 212 213 for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments())) 214 I.info = ArgumentsCount < FI.getNumRequiredArgs() 215 ? classifyArgumentType(I.type) 216 : ABIArgInfo::getDirect(); 217 218 // Always honor user-specified calling convention. 219 if (FI.getCallingConvention() != llvm::CallingConv::C) 220 return; 221 222 FI.setEffectiveCallingConvention(getRuntimeCC()); 223 } 224 225 RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 226 QualType Ty, AggValueSlot Slot) const { 227 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false, 228 getContext().getTypeInfoInChars(Ty), 229 CharUnits::fromQuantity(1), 230 /*AllowHigherAlign=*/true, Slot); 231 } 232 233 void NVPTXTargetCodeGenInfo::setTargetAttributes( 234 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { 235 if (GV->isDeclaration()) 236 return; 237 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D); 238 if (VD) { 239 if (M.getLangOpts().CUDA) { 240 if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) 241 addNVVMMetadata(GV, "surface", 1); 242 else if (VD->getType()->isCUDADeviceBuiltinTextureType()) 243 addNVVMMetadata(GV, "texture", 1); 244 return; 245 } 246 } 247 248 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); 249 if (!FD) 250 return; 251 252 llvm::Function *F = cast<llvm::Function>(GV); 253 254 // Perform special handling in OpenCL mode 255 if (M.getLangOpts().OpenCL) { 256 // Use OpenCL function attributes to check for kernel functions 257 // By default, all functions are device functions 258 if (FD->hasAttr<OpenCLKernelAttr>()) { 259 // OpenCL __kernel functions get kernel metadata 260 // Create !{<func-ref>, metadata !"kernel", i32 1} node 261 F->setCallingConv(llvm::CallingConv::PTX_Kernel); 262 // And kernel functions are not subject to inlining 263 F->addFnAttr(llvm::Attribute::NoInline); 264 } 265 } 266 267 // Perform special handling in CUDA mode. 268 if (M.getLangOpts().CUDA) { 269 // CUDA __global__ functions get a kernel metadata entry. Since 270 // __global__ functions cannot be called from the device, we do not 271 // need to set the noinline attribute. 272 if (FD->hasAttr<CUDAGlobalAttr>()) { 273 SmallVector<int, 10> GCI; 274 for (auto IV : llvm::enumerate(FD->parameters())) 275 if (IV.value()->hasAttr<CUDAGridConstantAttr>()) 276 // For some reason arg indices are 1-based in NVVM 277 GCI.push_back(IV.index() + 1); 278 // Create !{<func-ref>, metadata !"kernel", i32 1} node 279 F->setCallingConv(llvm::CallingConv::PTX_Kernel); 280 addGridConstantNVVMMetadata(F, GCI); 281 } 282 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) 283 M.handleCUDALaunchBoundsAttr(F, Attr); 284 } 285 286 // Attach kernel metadata directly if compiling for NVPTX. 287 if (FD->hasAttr<NVPTXKernelAttr>()) { 288 F->setCallingConv(llvm::CallingConv::PTX_Kernel); 289 } 290 } 291 292 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, 293 StringRef Name, int Operand) { 294 llvm::Module *M = GV->getParent(); 295 llvm::LLVMContext &Ctx = M->getContext(); 296 297 // Get "nvvm.annotations" metadata node 298 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); 299 300 SmallVector<llvm::Metadata *, 5> MDVals = { 301 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), 302 llvm::ConstantAsMetadata::get( 303 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; 304 305 // Append metadata to nvvm.annotations 306 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 307 } 308 309 void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata( 310 llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) { 311 312 llvm::Module *M = GV->getParent(); 313 llvm::LLVMContext &Ctx = M->getContext(); 314 315 // Get "nvvm.annotations" metadata node 316 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); 317 318 SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)}; 319 if (!GridConstantArgs.empty()) { 320 SmallVector<llvm::Metadata *, 10> GCM; 321 for (int I : GridConstantArgs) 322 GCM.push_back(llvm::ConstantAsMetadata::get( 323 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I))); 324 MDVals.append({llvm::MDString::get(Ctx, "grid_constant"), 325 llvm::MDNode::get(Ctx, GCM)}); 326 } 327 328 // Append metadata to nvvm.annotations 329 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 330 } 331 332 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { 333 return false; 334 } 335 336 llvm::Constant * 337 NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, 338 llvm::PointerType *PT, 339 QualType QT) const { 340 auto &Ctx = CGM.getContext(); 341 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local)) 342 return llvm::ConstantPointerNull::get(PT); 343 344 auto NPT = llvm::PointerType::get( 345 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); 346 return llvm::ConstantExpr::getAddrSpaceCast( 347 llvm::ConstantPointerNull::get(NPT), PT); 348 } 349 } // namespace 350 351 void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, 352 const CUDALaunchBoundsAttr *Attr, 353 int32_t *MaxThreadsVal, 354 int32_t *MinBlocksVal, 355 int32_t *MaxClusterRankVal) { 356 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node 357 llvm::APSInt MaxThreads(32); 358 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext()); 359 if (MaxThreads > 0) { 360 if (MaxThreadsVal) 361 *MaxThreadsVal = MaxThreads.getExtValue(); 362 if (F) { 363 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node 364 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx", 365 MaxThreads.getExtValue()); 366 } 367 } 368 369 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it 370 // was not specified in __launch_bounds__ or if the user specified a 0 value, 371 // we don't have to add a PTX directive. 372 if (Attr->getMinBlocks()) { 373 llvm::APSInt MinBlocks(32); 374 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext()); 375 if (MinBlocks > 0) { 376 if (MinBlocksVal) 377 *MinBlocksVal = MinBlocks.getExtValue(); 378 if (F) { 379 // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node 380 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm", 381 MinBlocks.getExtValue()); 382 } 383 } 384 } 385 if (Attr->getMaxBlocks()) { 386 llvm::APSInt MaxBlocks(32); 387 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext()); 388 if (MaxBlocks > 0) { 389 if (MaxClusterRankVal) 390 *MaxClusterRankVal = MaxBlocks.getExtValue(); 391 if (F) { 392 // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node 393 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank", 394 MaxBlocks.getExtValue()); 395 } 396 } 397 } 398 } 399 400 std::unique_ptr<TargetCodeGenInfo> 401 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { 402 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes()); 403 } 404