xref: /llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp (revision 4583f6d3443c8dc6605c868724e3743161954210)
1 //===- NVPTX.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/IR/CallingConv.h"
13 #include "llvm/IR/IntrinsicsNVPTX.h"
14 
15 using namespace clang;
16 using namespace clang::CodeGen;
17 
18 //===----------------------------------------------------------------------===//
19 // NVPTX ABI Implementation
20 //===----------------------------------------------------------------------===//
21 
22 namespace {
23 
24 class NVPTXTargetCodeGenInfo;
25 
26 class NVPTXABIInfo : public ABIInfo {
27   NVPTXTargetCodeGenInfo &CGInfo;
28 
29 public:
30   NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
31       : ABIInfo(CGT), CGInfo(Info) {}
32 
33   ABIArgInfo classifyReturnType(QualType RetTy) const;
34   ABIArgInfo classifyArgumentType(QualType Ty) const;
35 
36   void computeInfo(CGFunctionInfo &FI) const override;
37   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
38                    AggValueSlot Slot) const override;
39   bool isUnsupportedType(QualType T) const;
40   ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
41 };
42 
43 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
44 public:
45   NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
46       : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
47 
48   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
49                            CodeGen::CodeGenModule &M) const override;
50   bool shouldEmitStaticExternCAliases() const override;
51 
52   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
53                                  llvm::PointerType *T,
54                                  QualType QT) const override;
55 
56   llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
57     // On the device side, surface reference is represented as an object handle
58     // in 64-bit integer.
59     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
60   }
61 
62   llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
63     // On the device side, texture reference is represented as an object handle
64     // in 64-bit integer.
65     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
66   }
67 
68   bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
69                                               LValue Src) const override {
70     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
71     return true;
72   }
73 
74   bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
75                                               LValue Src) const override {
76     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
77     return true;
78   }
79 
80   // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
81   // resulting MDNode to the nvvm.annotations MDNode.
82   static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
83                               int Operand);
84 
85   static void
86   addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
87                               const SmallVectorImpl<int> &GridConstantArgs);
88 
89 private:
90   static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
91                                            LValue Src) {
92     llvm::Value *Handle = nullptr;
93     llvm::Constant *C =
94         llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF));
95     // Lookup `addrspacecast` through the constant pointer if any.
96     if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
97       C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
98     if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
99       // Load the handle from the specific global variable using
100       // `nvvm.texsurf.handle.internal` intrinsic.
101       Handle = CGF.EmitRuntimeCall(
102           CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
103                                {GV->getType()}),
104           {GV}, "texsurf_handle");
105     } else
106       Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
107     CGF.EmitStoreOfScalar(Handle, Dst);
108   }
109 };
110 
111 /// Checks if the type is unsupported directly by the current target.
112 bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
113   ASTContext &Context = getContext();
114   if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
115     return true;
116   if (!Context.getTargetInfo().hasFloat128Type() &&
117       (T->isFloat128Type() ||
118        (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
119     return true;
120   if (const auto *EIT = T->getAs<BitIntType>())
121     return EIT->getNumBits() >
122            (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
123   if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
124       Context.getTypeSize(T) > 64U)
125     return true;
126   if (const auto *AT = T->getAsArrayTypeUnsafe())
127     return isUnsupportedType(AT->getElementType());
128   const auto *RT = T->getAs<RecordType>();
129   if (!RT)
130     return false;
131   const RecordDecl *RD = RT->getDecl();
132 
133   // If this is a C++ record, check the bases first.
134   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
135     for (const CXXBaseSpecifier &I : CXXRD->bases())
136       if (isUnsupportedType(I.getType()))
137         return true;
138 
139   for (const FieldDecl *I : RD->fields())
140     if (isUnsupportedType(I->getType()))
141       return true;
142   return false;
143 }
144 
145 /// Coerce the given type into an array with maximum allowed size of elements.
146 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
147                                                    unsigned MaxSize) const {
148   // Alignment and Size are measured in bits.
149   const uint64_t Size = getContext().getTypeSize(Ty);
150   const uint64_t Alignment = getContext().getTypeAlign(Ty);
151   const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
152   llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
153   const uint64_t NumElements = (Size + Div - 1) / Div;
154   return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
155 }
156 
157 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
158   if (RetTy->isVoidType())
159     return ABIArgInfo::getIgnore();
160 
161   if (getContext().getLangOpts().OpenMP &&
162       getContext().getLangOpts().OpenMPIsTargetDevice &&
163       isUnsupportedType(RetTy))
164     return coerceToIntArrayWithLimit(RetTy, 64);
165 
166   // note: this is different from default ABI
167   if (!RetTy->isScalarType())
168     return ABIArgInfo::getDirect();
169 
170   // Treat an enum type as its underlying type.
171   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
172     RetTy = EnumTy->getDecl()->getIntegerType();
173 
174   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
175                                                : ABIArgInfo::getDirect());
176 }
177 
178 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
179   // Treat an enum type as its underlying type.
180   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
181     Ty = EnumTy->getDecl()->getIntegerType();
182 
183   // Return aggregates type as indirect by value
184   if (isAggregateTypeForABI(Ty)) {
185     // Under CUDA device compilation, tex/surf builtin types are replaced with
186     // object types and passed directly.
187     if (getContext().getLangOpts().CUDAIsDevice) {
188       if (Ty->isCUDADeviceBuiltinSurfaceType())
189         return ABIArgInfo::getDirect(
190             CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
191       if (Ty->isCUDADeviceBuiltinTextureType())
192         return ABIArgInfo::getDirect(
193             CGInfo.getCUDADeviceBuiltinTextureDeviceType());
194     }
195     return getNaturalAlignIndirect(Ty, /* byval */ true);
196   }
197 
198   if (const auto *EIT = Ty->getAs<BitIntType>()) {
199     if ((EIT->getNumBits() > 128) ||
200         (!getContext().getTargetInfo().hasInt128Type() &&
201          EIT->getNumBits() > 64))
202       return getNaturalAlignIndirect(Ty, /* byval */ true);
203   }
204 
205   return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
206                                             : ABIArgInfo::getDirect());
207 }
208 
209 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
210   if (!getCXXABI().classifyReturnType(FI))
211     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
212 
213   for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments()))
214     I.info = ArgumentsCount < FI.getNumRequiredArgs()
215                  ? classifyArgumentType(I.type)
216                  : ABIArgInfo::getDirect();
217 
218   // Always honor user-specified calling convention.
219   if (FI.getCallingConvention() != llvm::CallingConv::C)
220     return;
221 
222   FI.setEffectiveCallingConvention(getRuntimeCC());
223 }
224 
225 RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
226                                QualType Ty, AggValueSlot Slot) const {
227   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false,
228                           getContext().getTypeInfoInChars(Ty),
229                           CharUnits::fromQuantity(1),
230                           /*AllowHigherAlign=*/true, Slot);
231 }
232 
233 void NVPTXTargetCodeGenInfo::setTargetAttributes(
234     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
235   if (GV->isDeclaration())
236     return;
237   const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
238   if (VD) {
239     if (M.getLangOpts().CUDA) {
240       if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
241         addNVVMMetadata(GV, "surface", 1);
242       else if (VD->getType()->isCUDADeviceBuiltinTextureType())
243         addNVVMMetadata(GV, "texture", 1);
244       return;
245     }
246   }
247 
248   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
249   if (!FD)
250     return;
251 
252   llvm::Function *F = cast<llvm::Function>(GV);
253 
254   // Perform special handling in OpenCL mode
255   if (M.getLangOpts().OpenCL) {
256     // Use OpenCL function attributes to check for kernel functions
257     // By default, all functions are device functions
258     if (FD->hasAttr<OpenCLKernelAttr>()) {
259       // OpenCL __kernel functions get kernel metadata
260       // Create !{<func-ref>, metadata !"kernel", i32 1} node
261       F->setCallingConv(llvm::CallingConv::PTX_Kernel);
262       // And kernel functions are not subject to inlining
263       F->addFnAttr(llvm::Attribute::NoInline);
264     }
265   }
266 
267   // Perform special handling in CUDA mode.
268   if (M.getLangOpts().CUDA) {
269     // CUDA __global__ functions get a kernel metadata entry.  Since
270     // __global__ functions cannot be called from the device, we do not
271     // need to set the noinline attribute.
272     if (FD->hasAttr<CUDAGlobalAttr>()) {
273       SmallVector<int, 10> GCI;
274       for (auto IV : llvm::enumerate(FD->parameters()))
275         if (IV.value()->hasAttr<CUDAGridConstantAttr>())
276           // For some reason arg indices are 1-based in NVVM
277           GCI.push_back(IV.index() + 1);
278       // Create !{<func-ref>, metadata !"kernel", i32 1} node
279       F->setCallingConv(llvm::CallingConv::PTX_Kernel);
280       addGridConstantNVVMMetadata(F, GCI);
281     }
282     if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
283       M.handleCUDALaunchBoundsAttr(F, Attr);
284   }
285 
286   // Attach kernel metadata directly if compiling for NVPTX.
287   if (FD->hasAttr<NVPTXKernelAttr>()) {
288     F->setCallingConv(llvm::CallingConv::PTX_Kernel);
289   }
290 }
291 
292 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
293                                              StringRef Name, int Operand) {
294   llvm::Module *M = GV->getParent();
295   llvm::LLVMContext &Ctx = M->getContext();
296 
297   // Get "nvvm.annotations" metadata node
298   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
299 
300   SmallVector<llvm::Metadata *, 5> MDVals = {
301       llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
302       llvm::ConstantAsMetadata::get(
303           llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
304 
305   // Append metadata to nvvm.annotations
306   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
307 }
308 
309 void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
310     llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
311 
312   llvm::Module *M = GV->getParent();
313   llvm::LLVMContext &Ctx = M->getContext();
314 
315   // Get "nvvm.annotations" metadata node
316   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
317 
318   SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
319   if (!GridConstantArgs.empty()) {
320     SmallVector<llvm::Metadata *, 10> GCM;
321     for (int I : GridConstantArgs)
322       GCM.push_back(llvm::ConstantAsMetadata::get(
323           llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
324     MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
325                    llvm::MDNode::get(Ctx, GCM)});
326   }
327 
328   // Append metadata to nvvm.annotations
329   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
330 }
331 
332 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
333   return false;
334 }
335 
336 llvm::Constant *
337 NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
338                                        llvm::PointerType *PT,
339                                        QualType QT) const {
340   auto &Ctx = CGM.getContext();
341   if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
342     return llvm::ConstantPointerNull::get(PT);
343 
344   auto NPT = llvm::PointerType::get(
345       PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
346   return llvm::ConstantExpr::getAddrSpaceCast(
347       llvm::ConstantPointerNull::get(NPT), PT);
348 }
349 } // namespace
350 
351 void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
352                                                const CUDALaunchBoundsAttr *Attr,
353                                                int32_t *MaxThreadsVal,
354                                                int32_t *MinBlocksVal,
355                                                int32_t *MaxClusterRankVal) {
356   // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
357   llvm::APSInt MaxThreads(32);
358   MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
359   if (MaxThreads > 0) {
360     if (MaxThreadsVal)
361       *MaxThreadsVal = MaxThreads.getExtValue();
362     if (F) {
363       // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
364       NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
365                                               MaxThreads.getExtValue());
366     }
367   }
368 
369   // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
370   // was not specified in __launch_bounds__ or if the user specified a 0 value,
371   // we don't have to add a PTX directive.
372   if (Attr->getMinBlocks()) {
373     llvm::APSInt MinBlocks(32);
374     MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
375     if (MinBlocks > 0) {
376       if (MinBlocksVal)
377         *MinBlocksVal = MinBlocks.getExtValue();
378       if (F) {
379         // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
380         NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
381                                                 MinBlocks.getExtValue());
382       }
383     }
384   }
385   if (Attr->getMaxBlocks()) {
386     llvm::APSInt MaxBlocks(32);
387     MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
388     if (MaxBlocks > 0) {
389       if (MaxClusterRankVal)
390         *MaxClusterRankVal = MaxBlocks.getExtValue();
391       if (F) {
392         // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
393         NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
394                                                 MaxBlocks.getExtValue());
395       }
396     }
397   }
398 }
399 
400 std::unique_ptr<TargetCodeGenInfo>
401 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
402   return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
403 }
404