xref: /llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp (revision 03744d2aaffee04bc1e4d0668c41556c3c20d406)
1992cb984SSergei Barannikov //===- AMDGPU.cpp ---------------------------------------------------------===//
2992cb984SSergei Barannikov //
3992cb984SSergei Barannikov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4992cb984SSergei Barannikov // See https://llvm.org/LICENSE.txt for license information.
5992cb984SSergei Barannikov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6992cb984SSergei Barannikov //
7992cb984SSergei Barannikov //===----------------------------------------------------------------------===//
8992cb984SSergei Barannikov 
9992cb984SSergei Barannikov #include "ABIInfoImpl.h"
10992cb984SSergei Barannikov #include "TargetInfo.h"
11f616c3eeSSaiyedul Islam #include "clang/Basic/TargetOptions.h"
1251b4ada4SMatt Arsenault #include "llvm/Support/AMDGPUAddrSpace.h"
13992cb984SSergei Barannikov 
14992cb984SSergei Barannikov using namespace clang;
15992cb984SSergei Barannikov using namespace clang::CodeGen;
16992cb984SSergei Barannikov 
17992cb984SSergei Barannikov //===----------------------------------------------------------------------===//
18992cb984SSergei Barannikov // AMDGPU ABI Implementation
19992cb984SSergei Barannikov //===----------------------------------------------------------------------===//
20992cb984SSergei Barannikov 
21992cb984SSergei Barannikov namespace {
22992cb984SSergei Barannikov 
23992cb984SSergei Barannikov class AMDGPUABIInfo final : public DefaultABIInfo {
24992cb984SSergei Barannikov private:
25992cb984SSergei Barannikov   static const unsigned MaxNumRegsForArgsRet = 16;
26992cb984SSergei Barannikov 
27992cb984SSergei Barannikov   unsigned numRegsForType(QualType Ty) const;
28992cb984SSergei Barannikov 
29992cb984SSergei Barannikov   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
30992cb984SSergei Barannikov   bool isHomogeneousAggregateSmallEnough(const Type *Base,
31992cb984SSergei Barannikov                                          uint64_t Members) const override;
32992cb984SSergei Barannikov 
33992cb984SSergei Barannikov   // Coerce HIP scalar pointer arguments from generic pointers to global ones.
34992cb984SSergei Barannikov   llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
35992cb984SSergei Barannikov                                        unsigned ToAS) const {
36992cb984SSergei Barannikov     // Single value types.
37992cb984SSergei Barannikov     auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
38992cb984SSergei Barannikov     if (PtrTy && PtrTy->getAddressSpace() == FromAS)
39992cb984SSergei Barannikov       return llvm::PointerType::get(Ty->getContext(), ToAS);
40992cb984SSergei Barannikov     return Ty;
41992cb984SSergei Barannikov   }
42992cb984SSergei Barannikov 
43992cb984SSergei Barannikov public:
44992cb984SSergei Barannikov   explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
45992cb984SSergei Barannikov     DefaultABIInfo(CGT) {}
46992cb984SSergei Barannikov 
47992cb984SSergei Barannikov   ABIArgInfo classifyReturnType(QualType RetTy) const;
48992cb984SSergei Barannikov   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
49794457f6SJon Chesterfield   ABIArgInfo classifyArgumentType(QualType Ty, bool Variadic,
50794457f6SJon Chesterfield                                   unsigned &NumRegsLeft) const;
51992cb984SSergei Barannikov 
52992cb984SSergei Barannikov   void computeInfo(CGFunctionInfo &FI) const override;
536d973b45SMariya Podchishchaeva   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
546d973b45SMariya Podchishchaeva                    AggValueSlot Slot) const override;
55*03744d2aSShilei Tian 
56*03744d2aSShilei Tian   llvm::FixedVectorType *
57*03744d2aSShilei Tian   getOptimalVectorMemoryType(llvm::FixedVectorType *T,
58*03744d2aSShilei Tian                              const LangOptions &Opt) const override {
59*03744d2aSShilei Tian     // We have legal instructions for 96-bit so 3x32 can be supported.
60*03744d2aSShilei Tian     // FIXME: This check should be a subtarget feature as technically SI doesn't
61*03744d2aSShilei Tian     // support it.
62*03744d2aSShilei Tian     if (T->getNumElements() == 3 && getDataLayout().getTypeSizeInBits(T) == 96)
63*03744d2aSShilei Tian       return T;
64*03744d2aSShilei Tian     return DefaultABIInfo::getOptimalVectorMemoryType(T, Opt);
65*03744d2aSShilei Tian   }
66992cb984SSergei Barannikov };
67992cb984SSergei Barannikov 
68992cb984SSergei Barannikov bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
69992cb984SSergei Barannikov   return true;
70992cb984SSergei Barannikov }
71992cb984SSergei Barannikov 
72992cb984SSergei Barannikov bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
73992cb984SSergei Barannikov   const Type *Base, uint64_t Members) const {
74992cb984SSergei Barannikov   uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
75992cb984SSergei Barannikov 
76992cb984SSergei Barannikov   // Homogeneous Aggregates may occupy at most 16 registers.
77992cb984SSergei Barannikov   return Members * NumRegs <= MaxNumRegsForArgsRet;
78992cb984SSergei Barannikov }
79992cb984SSergei Barannikov 
80992cb984SSergei Barannikov /// Estimate number of registers the type will use when passed in registers.
81992cb984SSergei Barannikov unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
82992cb984SSergei Barannikov   unsigned NumRegs = 0;
83992cb984SSergei Barannikov 
84992cb984SSergei Barannikov   if (const VectorType *VT = Ty->getAs<VectorType>()) {
85992cb984SSergei Barannikov     // Compute from the number of elements. The reported size is based on the
86992cb984SSergei Barannikov     // in-memory size, which includes the padding 4th element for 3-vectors.
87992cb984SSergei Barannikov     QualType EltTy = VT->getElementType();
88992cb984SSergei Barannikov     unsigned EltSize = getContext().getTypeSize(EltTy);
89992cb984SSergei Barannikov 
90992cb984SSergei Barannikov     // 16-bit element vectors should be passed as packed.
91992cb984SSergei Barannikov     if (EltSize == 16)
92992cb984SSergei Barannikov       return (VT->getNumElements() + 1) / 2;
93992cb984SSergei Barannikov 
94992cb984SSergei Barannikov     unsigned EltNumRegs = (EltSize + 31) / 32;
95992cb984SSergei Barannikov     return EltNumRegs * VT->getNumElements();
96992cb984SSergei Barannikov   }
97992cb984SSergei Barannikov 
98992cb984SSergei Barannikov   if (const RecordType *RT = Ty->getAs<RecordType>()) {
99992cb984SSergei Barannikov     const RecordDecl *RD = RT->getDecl();
100992cb984SSergei Barannikov     assert(!RD->hasFlexibleArrayMember());
101992cb984SSergei Barannikov 
102992cb984SSergei Barannikov     for (const FieldDecl *Field : RD->fields()) {
103992cb984SSergei Barannikov       QualType FieldTy = Field->getType();
104992cb984SSergei Barannikov       NumRegs += numRegsForType(FieldTy);
105992cb984SSergei Barannikov     }
106992cb984SSergei Barannikov 
107992cb984SSergei Barannikov     return NumRegs;
108992cb984SSergei Barannikov   }
109992cb984SSergei Barannikov 
110992cb984SSergei Barannikov   return (getContext().getTypeSize(Ty) + 31) / 32;
111992cb984SSergei Barannikov }
112992cb984SSergei Barannikov 
113992cb984SSergei Barannikov void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
114992cb984SSergei Barannikov   llvm::CallingConv::ID CC = FI.getCallingConvention();
115992cb984SSergei Barannikov 
116992cb984SSergei Barannikov   if (!getCXXABI().classifyReturnType(FI))
117992cb984SSergei Barannikov     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
118992cb984SSergei Barannikov 
119794457f6SJon Chesterfield   unsigned ArgumentIndex = 0;
120794457f6SJon Chesterfield   const unsigned numFixedArguments = FI.getNumRequiredArgs();
121794457f6SJon Chesterfield 
122992cb984SSergei Barannikov   unsigned NumRegsLeft = MaxNumRegsForArgsRet;
123992cb984SSergei Barannikov   for (auto &Arg : FI.arguments()) {
124992cb984SSergei Barannikov     if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
125992cb984SSergei Barannikov       Arg.info = classifyKernelArgumentType(Arg.type);
126992cb984SSergei Barannikov     } else {
127794457f6SJon Chesterfield       bool FixedArgument = ArgumentIndex++ < numFixedArguments;
128794457f6SJon Chesterfield       Arg.info = classifyArgumentType(Arg.type, !FixedArgument, NumRegsLeft);
129992cb984SSergei Barannikov     }
130992cb984SSergei Barannikov   }
131992cb984SSergei Barannikov }
132992cb984SSergei Barannikov 
1336d973b45SMariya Podchishchaeva RValue AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
1346d973b45SMariya Podchishchaeva                                 QualType Ty, AggValueSlot Slot) const {
1358516f54eSJon Chesterfield   const bool IsIndirect = false;
1368516f54eSJon Chesterfield   const bool AllowHigherAlign = false;
1378516f54eSJon Chesterfield   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
1388516f54eSJon Chesterfield                           getContext().getTypeInfoInChars(Ty),
1396d973b45SMariya Podchishchaeva                           CharUnits::fromQuantity(4), AllowHigherAlign, Slot);
140992cb984SSergei Barannikov }
141992cb984SSergei Barannikov 
142992cb984SSergei Barannikov ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
143992cb984SSergei Barannikov   if (isAggregateTypeForABI(RetTy)) {
144992cb984SSergei Barannikov     // Records with non-trivial destructors/copy-constructors should not be
145992cb984SSergei Barannikov     // returned by value.
146992cb984SSergei Barannikov     if (!getRecordArgABI(RetTy, getCXXABI())) {
147992cb984SSergei Barannikov       // Ignore empty structs/unions.
148992cb984SSergei Barannikov       if (isEmptyRecord(getContext(), RetTy, true))
149992cb984SSergei Barannikov         return ABIArgInfo::getIgnore();
150992cb984SSergei Barannikov 
151992cb984SSergei Barannikov       // Lower single-element structs to just return a regular value.
152992cb984SSergei Barannikov       if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
153992cb984SSergei Barannikov         return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
154992cb984SSergei Barannikov 
155992cb984SSergei Barannikov       if (const RecordType *RT = RetTy->getAs<RecordType>()) {
156992cb984SSergei Barannikov         const RecordDecl *RD = RT->getDecl();
157992cb984SSergei Barannikov         if (RD->hasFlexibleArrayMember())
158992cb984SSergei Barannikov           return DefaultABIInfo::classifyReturnType(RetTy);
159992cb984SSergei Barannikov       }
160992cb984SSergei Barannikov 
161992cb984SSergei Barannikov       // Pack aggregates <= 4 bytes into single VGPR or pair.
162992cb984SSergei Barannikov       uint64_t Size = getContext().getTypeSize(RetTy);
163992cb984SSergei Barannikov       if (Size <= 16)
164992cb984SSergei Barannikov         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
165992cb984SSergei Barannikov 
166992cb984SSergei Barannikov       if (Size <= 32)
167992cb984SSergei Barannikov         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
168992cb984SSergei Barannikov 
169992cb984SSergei Barannikov       if (Size <= 64) {
170992cb984SSergei Barannikov         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
171992cb984SSergei Barannikov         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
172992cb984SSergei Barannikov       }
173992cb984SSergei Barannikov 
174992cb984SSergei Barannikov       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
175992cb984SSergei Barannikov         return ABIArgInfo::getDirect();
176992cb984SSergei Barannikov     }
177992cb984SSergei Barannikov   }
178992cb984SSergei Barannikov 
179992cb984SSergei Barannikov   // Otherwise just do the default thing.
180992cb984SSergei Barannikov   return DefaultABIInfo::classifyReturnType(RetTy);
181992cb984SSergei Barannikov }
182992cb984SSergei Barannikov 
183992cb984SSergei Barannikov /// For kernels all parameters are really passed in a special buffer. It doesn't
184992cb984SSergei Barannikov /// make sense to pass anything byval, so everything must be direct.
185992cb984SSergei Barannikov ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
186992cb984SSergei Barannikov   Ty = useFirstFieldIfTransparentUnion(Ty);
187992cb984SSergei Barannikov 
188992cb984SSergei Barannikov   // TODO: Can we omit empty structs?
189992cb984SSergei Barannikov 
190992cb984SSergei Barannikov   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
191992cb984SSergei Barannikov     Ty = QualType(SeltTy, 0);
192992cb984SSergei Barannikov 
193992cb984SSergei Barannikov   llvm::Type *OrigLTy = CGT.ConvertType(Ty);
194992cb984SSergei Barannikov   llvm::Type *LTy = OrigLTy;
195992cb984SSergei Barannikov   if (getContext().getLangOpts().HIP) {
196992cb984SSergei Barannikov     LTy = coerceKernelArgumentType(
197992cb984SSergei Barannikov         OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
198992cb984SSergei Barannikov         /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
199992cb984SSergei Barannikov   }
200992cb984SSergei Barannikov 
201992cb984SSergei Barannikov   // FIXME: Should also use this for OpenCL, but it requires addressing the
202992cb984SSergei Barannikov   // problem of kernels being called.
203992cb984SSergei Barannikov   //
204992cb984SSergei Barannikov   // FIXME: This doesn't apply the optimization of coercing pointers in structs
205992cb984SSergei Barannikov   // to global address space when using byref. This would require implementing a
206992cb984SSergei Barannikov   // new kind of coercion of the in-memory type when for indirect arguments.
207992cb984SSergei Barannikov   if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
208992cb984SSergei Barannikov       isAggregateTypeForABI(Ty)) {
209992cb984SSergei Barannikov     return ABIArgInfo::getIndirectAliased(
210992cb984SSergei Barannikov         getContext().getTypeAlignInChars(Ty),
211992cb984SSergei Barannikov         getContext().getTargetAddressSpace(LangAS::opencl_constant),
212992cb984SSergei Barannikov         false /*Realign*/, nullptr /*Padding*/);
213992cb984SSergei Barannikov   }
214992cb984SSergei Barannikov 
215992cb984SSergei Barannikov   // If we set CanBeFlattened to true, CodeGen will expand the struct to its
216992cb984SSergei Barannikov   // individual elements, which confuses the Clover OpenCL backend; therefore we
217992cb984SSergei Barannikov   // have to set it to false here. Other args of getDirect() are just defaults.
218992cb984SSergei Barannikov   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
219992cb984SSergei Barannikov }
220992cb984SSergei Barannikov 
221794457f6SJon Chesterfield ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
222992cb984SSergei Barannikov                                                unsigned &NumRegsLeft) const {
223992cb984SSergei Barannikov   assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
224992cb984SSergei Barannikov 
225992cb984SSergei Barannikov   Ty = useFirstFieldIfTransparentUnion(Ty);
226992cb984SSergei Barannikov 
227794457f6SJon Chesterfield   if (Variadic) {
228794457f6SJon Chesterfield     return ABIArgInfo::getDirect(/*T=*/nullptr,
229794457f6SJon Chesterfield                                  /*Offset=*/0,
230794457f6SJon Chesterfield                                  /*Padding=*/nullptr,
231794457f6SJon Chesterfield                                  /*CanBeFlattened=*/false,
232794457f6SJon Chesterfield                                  /*Align=*/0);
233794457f6SJon Chesterfield   }
234794457f6SJon Chesterfield 
235992cb984SSergei Barannikov   if (isAggregateTypeForABI(Ty)) {
236992cb984SSergei Barannikov     // Records with non-trivial destructors/copy-constructors should not be
237992cb984SSergei Barannikov     // passed by value.
238992cb984SSergei Barannikov     if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
239992cb984SSergei Barannikov       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
240992cb984SSergei Barannikov 
241992cb984SSergei Barannikov     // Ignore empty structs/unions.
242992cb984SSergei Barannikov     if (isEmptyRecord(getContext(), Ty, true))
243992cb984SSergei Barannikov       return ABIArgInfo::getIgnore();
244992cb984SSergei Barannikov 
245992cb984SSergei Barannikov     // Lower single-element structs to just pass a regular value. TODO: We
246992cb984SSergei Barannikov     // could do reasonable-size multiple-element structs too, using getExpand(),
247992cb984SSergei Barannikov     // though watch out for things like bitfields.
248992cb984SSergei Barannikov     if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
249992cb984SSergei Barannikov       return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
250992cb984SSergei Barannikov 
251992cb984SSergei Barannikov     if (const RecordType *RT = Ty->getAs<RecordType>()) {
252992cb984SSergei Barannikov       const RecordDecl *RD = RT->getDecl();
253992cb984SSergei Barannikov       if (RD->hasFlexibleArrayMember())
254992cb984SSergei Barannikov         return DefaultABIInfo::classifyArgumentType(Ty);
255992cb984SSergei Barannikov     }
256992cb984SSergei Barannikov 
257992cb984SSergei Barannikov     // Pack aggregates <= 8 bytes into single VGPR or pair.
258992cb984SSergei Barannikov     uint64_t Size = getContext().getTypeSize(Ty);
259992cb984SSergei Barannikov     if (Size <= 64) {
260992cb984SSergei Barannikov       unsigned NumRegs = (Size + 31) / 32;
261992cb984SSergei Barannikov       NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
262992cb984SSergei Barannikov 
263992cb984SSergei Barannikov       if (Size <= 16)
264992cb984SSergei Barannikov         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
265992cb984SSergei Barannikov 
266992cb984SSergei Barannikov       if (Size <= 32)
267992cb984SSergei Barannikov         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
268992cb984SSergei Barannikov 
269992cb984SSergei Barannikov       // XXX: Should this be i64 instead, and should the limit increase?
270992cb984SSergei Barannikov       llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
271992cb984SSergei Barannikov       return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
272992cb984SSergei Barannikov     }
273992cb984SSergei Barannikov 
274992cb984SSergei Barannikov     if (NumRegsLeft > 0) {
275992cb984SSergei Barannikov       unsigned NumRegs = numRegsForType(Ty);
276992cb984SSergei Barannikov       if (NumRegsLeft >= NumRegs) {
277992cb984SSergei Barannikov         NumRegsLeft -= NumRegs;
278992cb984SSergei Barannikov         return ABIArgInfo::getDirect();
279992cb984SSergei Barannikov       }
280992cb984SSergei Barannikov     }
281d77c6205SChangpeng Fang 
282d77c6205SChangpeng Fang     // Use pass-by-reference in stead of pass-by-value for struct arguments in
283d77c6205SChangpeng Fang     // function ABI.
284d77c6205SChangpeng Fang     return ABIArgInfo::getIndirectAliased(
285d77c6205SChangpeng Fang         getContext().getTypeAlignInChars(Ty),
286d77c6205SChangpeng Fang         getContext().getTargetAddressSpace(LangAS::opencl_private));
287992cb984SSergei Barannikov   }
288992cb984SSergei Barannikov 
289992cb984SSergei Barannikov   // Otherwise just do the default thing.
290992cb984SSergei Barannikov   ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
291992cb984SSergei Barannikov   if (!ArgInfo.isIndirect()) {
292992cb984SSergei Barannikov     unsigned NumRegs = numRegsForType(Ty);
293992cb984SSergei Barannikov     NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
294992cb984SSergei Barannikov   }
295992cb984SSergei Barannikov 
296992cb984SSergei Barannikov   return ArgInfo;
297992cb984SSergei Barannikov }
298992cb984SSergei Barannikov 
299992cb984SSergei Barannikov class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
300992cb984SSergei Barannikov public:
301992cb984SSergei Barannikov   AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
302992cb984SSergei Barannikov       : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
303992cb984SSergei Barannikov 
304992cb984SSergei Barannikov   void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
305992cb984SSergei Barannikov                                  CodeGenModule &CGM) const;
306992cb984SSergei Barannikov 
307f616c3eeSSaiyedul Islam   void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
308f616c3eeSSaiyedul Islam 
309992cb984SSergei Barannikov   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
310992cb984SSergei Barannikov                            CodeGen::CodeGenModule &M) const override;
311992cb984SSergei Barannikov   unsigned getOpenCLKernelCallingConv() const override;
312992cb984SSergei Barannikov 
313992cb984SSergei Barannikov   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
314992cb984SSergei Barannikov       llvm::PointerType *T, QualType QT) const override;
315992cb984SSergei Barannikov 
316992cb984SSergei Barannikov   LangAS getASTAllocaAddressSpace() const override {
317992cb984SSergei Barannikov     return getLangASFromTargetAS(
318992cb984SSergei Barannikov         getABIInfo().getDataLayout().getAllocaAddrSpace());
319992cb984SSergei Barannikov   }
320992cb984SSergei Barannikov   LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
321992cb984SSergei Barannikov                                   const VarDecl *D) const override;
322992cb984SSergei Barannikov   llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
323992cb984SSergei Barannikov                                          SyncScope Scope,
324992cb984SSergei Barannikov                                          llvm::AtomicOrdering Ordering,
325992cb984SSergei Barannikov                                          llvm::LLVMContext &Ctx) const override;
326e108853aSMatt Arsenault   void setTargetAtomicMetadata(CodeGenFunction &CGF,
32751b4ada4SMatt Arsenault                                llvm::Instruction &AtomicInst,
32851b4ada4SMatt Arsenault                                const AtomicExpr *Expr = nullptr) const override;
329992cb984SSergei Barannikov   llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
330992cb984SSergei Barannikov                                          llvm::Function *BlockInvokeFunc,
331992cb984SSergei Barannikov                                          llvm::Type *BlockTy) const override;
332992cb984SSergei Barannikov   bool shouldEmitStaticExternCAliases() const override;
333992cb984SSergei Barannikov   bool shouldEmitDWARFBitFieldSeparators() const override;
334992cb984SSergei Barannikov   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
335992cb984SSergei Barannikov };
336992cb984SSergei Barannikov }
337992cb984SSergei Barannikov 
338992cb984SSergei Barannikov static bool requiresAMDGPUProtectedVisibility(const Decl *D,
339992cb984SSergei Barannikov                                               llvm::GlobalValue *GV) {
340992cb984SSergei Barannikov   if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
341992cb984SSergei Barannikov     return false;
342992cb984SSergei Barannikov 
3431d959f93SJoseph Huber   return !D->hasAttr<OMPDeclareTargetDeclAttr>() &&
3441d959f93SJoseph Huber          (D->hasAttr<OpenCLKernelAttr>() ||
345992cb984SSergei Barannikov           (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
346992cb984SSergei Barannikov           (isa<VarDecl>(D) &&
347992cb984SSergei Barannikov            (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
348992cb984SSergei Barannikov             cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
3491d959f93SJoseph Huber             cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())));
350992cb984SSergei Barannikov }
351992cb984SSergei Barannikov 
352992cb984SSergei Barannikov void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
353992cb984SSergei Barannikov     const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const {
354992cb984SSergei Barannikov   const auto *ReqdWGS =
355992cb984SSergei Barannikov       M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
356992cb984SSergei Barannikov   const bool IsOpenCLKernel =
357992cb984SSergei Barannikov       M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
358992cb984SSergei Barannikov   const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
359992cb984SSergei Barannikov 
360992cb984SSergei Barannikov   const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
361992cb984SSergei Barannikov   if (ReqdWGS || FlatWGS) {
36208a22076SJohannes Doerfert     M.handleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS);
363992cb984SSergei Barannikov   } else if (IsOpenCLKernel || IsHIPKernel) {
364992cb984SSergei Barannikov     // By default, restrict the maximum size to a value specified by
365992cb984SSergei Barannikov     // --gpu-max-threads-per-block=n or its default value for HIP.
366992cb984SSergei Barannikov     const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
367992cb984SSergei Barannikov     const unsigned DefaultMaxWorkGroupSize =
368992cb984SSergei Barannikov         IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
369992cb984SSergei Barannikov                        : M.getLangOpts().GPUMaxThreadsPerBlock;
370992cb984SSergei Barannikov     std::string AttrVal =
371992cb984SSergei Barannikov         std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
372992cb984SSergei Barannikov     F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
373992cb984SSergei Barannikov   }
374992cb984SSergei Barannikov 
37508a22076SJohannes Doerfert   if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>())
37608a22076SJohannes Doerfert     M.handleAMDGPUWavesPerEUAttr(F, Attr);
377992cb984SSergei Barannikov 
378992cb984SSergei Barannikov   if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
379992cb984SSergei Barannikov     unsigned NumSGPR = Attr->getNumSGPR();
380992cb984SSergei Barannikov 
381992cb984SSergei Barannikov     if (NumSGPR != 0)
382992cb984SSergei Barannikov       F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR));
383992cb984SSergei Barannikov   }
384992cb984SSergei Barannikov 
385992cb984SSergei Barannikov   if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
386992cb984SSergei Barannikov     uint32_t NumVGPR = Attr->getNumVGPR();
387992cb984SSergei Barannikov 
388992cb984SSergei Barannikov     if (NumVGPR != 0)
389992cb984SSergei Barannikov       F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
390992cb984SSergei Barannikov   }
391c4e517f5SJun Wang 
392c4e517f5SJun Wang   if (const auto *Attr = FD->getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
393c4e517f5SJun Wang     uint32_t X = Attr->getMaxNumWorkGroupsX()
394c4e517f5SJun Wang                      ->EvaluateKnownConstInt(M.getContext())
395c4e517f5SJun Wang                      .getExtValue();
396c4e517f5SJun Wang     // Y and Z dimensions default to 1 if not specified
397c4e517f5SJun Wang     uint32_t Y = Attr->getMaxNumWorkGroupsY()
398c4e517f5SJun Wang                      ? Attr->getMaxNumWorkGroupsY()
399c4e517f5SJun Wang                            ->EvaluateKnownConstInt(M.getContext())
400c4e517f5SJun Wang                            .getExtValue()
401c4e517f5SJun Wang                      : 1;
402c4e517f5SJun Wang     uint32_t Z = Attr->getMaxNumWorkGroupsZ()
403c4e517f5SJun Wang                      ? Attr->getMaxNumWorkGroupsZ()
404c4e517f5SJun Wang                            ->EvaluateKnownConstInt(M.getContext())
405c4e517f5SJun Wang                            .getExtValue()
406c4e517f5SJun Wang                      : 1;
407c4e517f5SJun Wang 
408c4e517f5SJun Wang     llvm::SmallString<32> AttrVal;
409c4e517f5SJun Wang     llvm::raw_svector_ostream OS(AttrVal);
410c4e517f5SJun Wang     OS << X << ',' << Y << ',' << Z;
411c4e517f5SJun Wang 
412c4e517f5SJun Wang     F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str());
413c4e517f5SJun Wang   }
414992cb984SSergei Barannikov }
415992cb984SSergei Barannikov 
416f616c3eeSSaiyedul Islam /// Emits control constants used to change per-architecture behaviour in the
417f616c3eeSSaiyedul Islam /// AMDGPU ROCm device libraries.
418f616c3eeSSaiyedul Islam void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
419f616c3eeSSaiyedul Islam     CodeGen::CodeGenModule &CGM) const {
42021861991SSaiyedul Islam   StringRef Name = "__oclc_ABI_version";
42149ff6a96SJoseph Huber   llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
42249ff6a96SJoseph Huber   if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
423f616c3eeSSaiyedul Islam     return;
424f616c3eeSSaiyedul Islam 
42521861991SSaiyedul Islam   if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
42695943d2fSDominik Adamski       llvm::CodeObjectVersionKind::COV_None)
42721861991SSaiyedul Islam     return;
42821861991SSaiyedul Islam 
429f616c3eeSSaiyedul Islam   auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
430f616c3eeSSaiyedul Islam   llvm::Constant *COV = llvm::ConstantInt::get(
431f616c3eeSSaiyedul Islam       Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);
432f616c3eeSSaiyedul Islam 
433f616c3eeSSaiyedul Islam   // It needs to be constant weak_odr without externally_initialized so that
434f616c3eeSSaiyedul Islam   // the load instuction can be eliminated by the IPSCCP.
435f616c3eeSSaiyedul Islam   auto *GV = new llvm::GlobalVariable(
436f616c3eeSSaiyedul Islam       CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
437f616c3eeSSaiyedul Islam       nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
438f616c3eeSSaiyedul Islam       CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
439f616c3eeSSaiyedul Islam   GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
440f616c3eeSSaiyedul Islam   GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
44149ff6a96SJoseph Huber 
44249ff6a96SJoseph Huber   // Replace any external references to this variable with the new global.
44349ff6a96SJoseph Huber   if (OriginalGV) {
44449ff6a96SJoseph Huber     OriginalGV->replaceAllUsesWith(GV);
44549ff6a96SJoseph Huber     GV->takeName(OriginalGV);
44649ff6a96SJoseph Huber     OriginalGV->eraseFromParent();
44749ff6a96SJoseph Huber   }
448f616c3eeSSaiyedul Islam }
449f616c3eeSSaiyedul Islam 
450992cb984SSergei Barannikov void AMDGPUTargetCodeGenInfo::setTargetAttributes(
451992cb984SSergei Barannikov     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
452992cb984SSergei Barannikov   if (requiresAMDGPUProtectedVisibility(D, GV)) {
453992cb984SSergei Barannikov     GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
454992cb984SSergei Barannikov     GV->setDSOLocal(true);
455992cb984SSergei Barannikov   }
456992cb984SSergei Barannikov 
457992cb984SSergei Barannikov   if (GV->isDeclaration())
458992cb984SSergei Barannikov     return;
459992cb984SSergei Barannikov 
460992cb984SSergei Barannikov   llvm::Function *F = dyn_cast<llvm::Function>(GV);
461992cb984SSergei Barannikov   if (!F)
462992cb984SSergei Barannikov     return;
463992cb984SSergei Barannikov 
464992cb984SSergei Barannikov   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
465992cb984SSergei Barannikov   if (FD)
466992cb984SSergei Barannikov     setFunctionDeclAttributes(FD, F, M);
467992cb984SSergei Barannikov 
468992cb984SSergei Barannikov   if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
469992cb984SSergei Barannikov     F->addFnAttr("amdgpu-ieee", "false");
470992cb984SSergei Barannikov }
471992cb984SSergei Barannikov 
472992cb984SSergei Barannikov unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
473992cb984SSergei Barannikov   return llvm::CallingConv::AMDGPU_KERNEL;
474992cb984SSergei Barannikov }
475992cb984SSergei Barannikov 
476992cb984SSergei Barannikov // Currently LLVM assumes null pointers always have value 0,
477992cb984SSergei Barannikov // which results in incorrectly transformed IR. Therefore, instead of
478992cb984SSergei Barannikov // emitting null pointers in private and local address spaces, a null
479992cb984SSergei Barannikov // pointer in generic address space is emitted which is casted to a
480992cb984SSergei Barannikov // pointer in local or private address space.
481992cb984SSergei Barannikov llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
482992cb984SSergei Barannikov     const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT,
483992cb984SSergei Barannikov     QualType QT) const {
484992cb984SSergei Barannikov   if (CGM.getContext().getTargetNullPointerValue(QT) == 0)
485992cb984SSergei Barannikov     return llvm::ConstantPointerNull::get(PT);
486992cb984SSergei Barannikov 
487992cb984SSergei Barannikov   auto &Ctx = CGM.getContext();
488992cb984SSergei Barannikov   auto NPT = llvm::PointerType::get(
489992cb984SSergei Barannikov       PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
490992cb984SSergei Barannikov   return llvm::ConstantExpr::getAddrSpaceCast(
491992cb984SSergei Barannikov       llvm::ConstantPointerNull::get(NPT), PT);
492992cb984SSergei Barannikov }
493992cb984SSergei Barannikov 
494992cb984SSergei Barannikov LangAS
495992cb984SSergei Barannikov AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
496992cb984SSergei Barannikov                                                   const VarDecl *D) const {
497992cb984SSergei Barannikov   assert(!CGM.getLangOpts().OpenCL &&
498992cb984SSergei Barannikov          !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
499992cb984SSergei Barannikov          "Address space agnostic languages only");
500992cb984SSergei Barannikov   LangAS DefaultGlobalAS = getLangASFromTargetAS(
501992cb984SSergei Barannikov       CGM.getContext().getTargetAddressSpace(LangAS::opencl_global));
502992cb984SSergei Barannikov   if (!D)
503992cb984SSergei Barannikov     return DefaultGlobalAS;
504992cb984SSergei Barannikov 
505992cb984SSergei Barannikov   LangAS AddrSpace = D->getType().getAddressSpace();
506992cb984SSergei Barannikov   if (AddrSpace != LangAS::Default)
507992cb984SSergei Barannikov     return AddrSpace;
508992cb984SSergei Barannikov 
509992cb984SSergei Barannikov   // Only promote to address space 4 if VarDecl has constant initialization.
51019f2b680SDavid Blaikie   if (D->getType().isConstantStorage(CGM.getContext(), false, false) &&
511992cb984SSergei Barannikov       D->hasConstantInitialization()) {
512992cb984SSergei Barannikov     if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
513992cb984SSergei Barannikov       return *ConstAS;
514992cb984SSergei Barannikov   }
515992cb984SSergei Barannikov   return DefaultGlobalAS;
516992cb984SSergei Barannikov }
517992cb984SSergei Barannikov 
518992cb984SSergei Barannikov llvm::SyncScope::ID
519992cb984SSergei Barannikov AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
520992cb984SSergei Barannikov                                             SyncScope Scope,
521992cb984SSergei Barannikov                                             llvm::AtomicOrdering Ordering,
522992cb984SSergei Barannikov                                             llvm::LLVMContext &Ctx) const {
523992cb984SSergei Barannikov   std::string Name;
524992cb984SSergei Barannikov   switch (Scope) {
525992cb984SSergei Barannikov   case SyncScope::HIPSingleThread:
5264e80bc7dSJoseph Huber   case SyncScope::SingleScope:
527992cb984SSergei Barannikov     Name = "singlethread";
528992cb984SSergei Barannikov     break;
529992cb984SSergei Barannikov   case SyncScope::HIPWavefront:
530992cb984SSergei Barannikov   case SyncScope::OpenCLSubGroup:
5314e80bc7dSJoseph Huber   case SyncScope::WavefrontScope:
532992cb984SSergei Barannikov     Name = "wavefront";
533992cb984SSergei Barannikov     break;
534992cb984SSergei Barannikov   case SyncScope::HIPWorkgroup:
535992cb984SSergei Barannikov   case SyncScope::OpenCLWorkGroup:
5364e80bc7dSJoseph Huber   case SyncScope::WorkgroupScope:
537992cb984SSergei Barannikov     Name = "workgroup";
538992cb984SSergei Barannikov     break;
539992cb984SSergei Barannikov   case SyncScope::HIPAgent:
540992cb984SSergei Barannikov   case SyncScope::OpenCLDevice:
5414e80bc7dSJoseph Huber   case SyncScope::DeviceScope:
542992cb984SSergei Barannikov     Name = "agent";
543992cb984SSergei Barannikov     break;
5444e80bc7dSJoseph Huber   case SyncScope::SystemScope:
545992cb984SSergei Barannikov   case SyncScope::HIPSystem:
546992cb984SSergei Barannikov   case SyncScope::OpenCLAllSVMDevices:
547992cb984SSergei Barannikov     Name = "";
548992cb984SSergei Barannikov     break;
549992cb984SSergei Barannikov   }
550992cb984SSergei Barannikov 
55181fae0d5SJoseph Huber   // OpenCL assumes by default that atomic scopes are per-address space for
55281fae0d5SJoseph Huber   // non-sequentially consistent operations.
55381fae0d5SJoseph Huber   if (Scope >= SyncScope::OpenCLWorkGroup &&
55481fae0d5SJoseph Huber       Scope <= SyncScope::OpenCLSubGroup &&
55581fae0d5SJoseph Huber       Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
556992cb984SSergei Barannikov     if (!Name.empty())
557992cb984SSergei Barannikov       Name = Twine(Twine(Name) + Twine("-")).str();
558992cb984SSergei Barannikov 
559992cb984SSergei Barannikov     Name = Twine(Twine(Name) + Twine("one-as")).str();
560992cb984SSergei Barannikov   }
561992cb984SSergei Barannikov 
562992cb984SSergei Barannikov   return Ctx.getOrInsertSyncScopeID(Name);
563992cb984SSergei Barannikov }
564992cb984SSergei Barannikov 
565e108853aSMatt Arsenault void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
56651b4ada4SMatt Arsenault     CodeGenFunction &CGF, llvm::Instruction &AtomicInst,
56751b4ada4SMatt Arsenault     const AtomicExpr *AE) const {
56851b4ada4SMatt Arsenault   auto *RMW = dyn_cast<llvm::AtomicRMWInst>(&AtomicInst);
56951b4ada4SMatt Arsenault   auto *CmpX = dyn_cast<llvm::AtomicCmpXchgInst>(&AtomicInst);
57051b4ada4SMatt Arsenault 
57151b4ada4SMatt Arsenault   // OpenCL and old style HIP atomics consider atomics targeting thread private
57251b4ada4SMatt Arsenault   // memory to be undefined.
57351b4ada4SMatt Arsenault   //
57451b4ada4SMatt Arsenault   // TODO: This is probably undefined for atomic load/store, but there's not
57551b4ada4SMatt Arsenault   // much direct codegen benefit to knowing this.
57651b4ada4SMatt Arsenault   if (((RMW && RMW->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS) ||
57751b4ada4SMatt Arsenault        (CmpX &&
57851b4ada4SMatt Arsenault         CmpX->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS)) &&
57951b4ada4SMatt Arsenault       AE && AE->threadPrivateMemoryAtomicsAreUndefined()) {
58051b4ada4SMatt Arsenault     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
58151b4ada4SMatt Arsenault     llvm::MDNode *ASRange = MDHelper.createRange(
58251b4ada4SMatt Arsenault         llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS),
58351b4ada4SMatt Arsenault         llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS + 1));
58451b4ada4SMatt Arsenault     AtomicInst.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
58551b4ada4SMatt Arsenault   }
58651b4ada4SMatt Arsenault 
58751b4ada4SMatt Arsenault   if (!RMW || !CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
588e108853aSMatt Arsenault     return;
589e108853aSMatt Arsenault 
590e108853aSMatt Arsenault   // TODO: Introduce new, more controlled options that also work for integers,
591e108853aSMatt Arsenault   // and deprecate allowAMDGPUUnsafeFPAtomics.
59251b4ada4SMatt Arsenault   llvm::AtomicRMWInst::BinOp RMWOp = RMW->getOperation();
593e108853aSMatt Arsenault   if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) {
594e108853aSMatt Arsenault     llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {});
59551b4ada4SMatt Arsenault     RMW->setMetadata("amdgpu.no.fine.grained.memory", Empty);
596e108853aSMatt Arsenault 
59751b4ada4SMatt Arsenault     if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW->getType()->isFloatTy())
59851b4ada4SMatt Arsenault       RMW->setMetadata("amdgpu.ignore.denormal.mode", Empty);
599e108853aSMatt Arsenault   }
600e108853aSMatt Arsenault }
601e108853aSMatt Arsenault 
602992cb984SSergei Barannikov bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
603992cb984SSergei Barannikov   return false;
604992cb984SSergei Barannikov }
605992cb984SSergei Barannikov 
606992cb984SSergei Barannikov bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const {
607992cb984SSergei Barannikov   return true;
608992cb984SSergei Barannikov }
609992cb984SSergei Barannikov 
610992cb984SSergei Barannikov void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
611992cb984SSergei Barannikov     const FunctionType *&FT) const {
612992cb984SSergei Barannikov   FT = getABIInfo().getContext().adjustFunctionType(
613992cb984SSergei Barannikov       FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
614992cb984SSergei Barannikov }
615992cb984SSergei Barannikov 
616992cb984SSergei Barannikov /// Create an OpenCL kernel for an enqueued block.
617992cb984SSergei Barannikov ///
618992cb984SSergei Barannikov /// The type of the first argument (the block literal) is the struct type
619992cb984SSergei Barannikov /// of the block literal instead of a pointer type. The first argument
620992cb984SSergei Barannikov /// (block literal) is passed directly by value to the kernel. The kernel
621992cb984SSergei Barannikov /// allocates the same type of struct on stack and stores the block literal
622992cb984SSergei Barannikov /// to it and passes its pointer to the block invoke function. The kernel
623992cb984SSergei Barannikov /// has "enqueued-block" function attribute and kernel argument metadata.
624992cb984SSergei Barannikov llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
625992cb984SSergei Barannikov     CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const {
626992cb984SSergei Barannikov   auto &Builder = CGF.Builder;
627992cb984SSergei Barannikov   auto &C = CGF.getLLVMContext();
628992cb984SSergei Barannikov 
629992cb984SSergei Barannikov   auto *InvokeFT = Invoke->getFunctionType();
630992cb984SSergei Barannikov   llvm::SmallVector<llvm::Type *, 2> ArgTys;
631992cb984SSergei Barannikov   llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
632992cb984SSergei Barannikov   llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
633992cb984SSergei Barannikov   llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
634992cb984SSergei Barannikov   llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
635992cb984SSergei Barannikov   llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
636992cb984SSergei Barannikov   llvm::SmallVector<llvm::Metadata *, 8> ArgNames;
637992cb984SSergei Barannikov 
638992cb984SSergei Barannikov   ArgTys.push_back(BlockTy);
639992cb984SSergei Barannikov   ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
640992cb984SSergei Barannikov   AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
641992cb984SSergei Barannikov   ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
642992cb984SSergei Barannikov   ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
643992cb984SSergei Barannikov   AccessQuals.push_back(llvm::MDString::get(C, "none"));
644992cb984SSergei Barannikov   ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
645992cb984SSergei Barannikov   for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
646992cb984SSergei Barannikov     ArgTys.push_back(InvokeFT->getParamType(I));
647992cb984SSergei Barannikov     ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
648992cb984SSergei Barannikov     AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
649992cb984SSergei Barannikov     AccessQuals.push_back(llvm::MDString::get(C, "none"));
650992cb984SSergei Barannikov     ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
651992cb984SSergei Barannikov     ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
652992cb984SSergei Barannikov     ArgNames.push_back(
653992cb984SSergei Barannikov         llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str()));
654992cb984SSergei Barannikov   }
655992cb984SSergei Barannikov   std::string Name = Invoke->getName().str() + "_kernel";
656992cb984SSergei Barannikov   auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
657992cb984SSergei Barannikov   auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
658992cb984SSergei Barannikov                                    &CGF.CGM.getModule());
659992cb984SSergei Barannikov   F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
660992cb984SSergei Barannikov 
661992cb984SSergei Barannikov   llvm::AttrBuilder KernelAttrs(C);
662992cb984SSergei Barannikov   // FIXME: The invoke isn't applying the right attributes either
663992cb984SSergei Barannikov   // FIXME: This is missing setTargetAttributes
664992cb984SSergei Barannikov   CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs);
665992cb984SSergei Barannikov   KernelAttrs.addAttribute("enqueued-block");
666992cb984SSergei Barannikov   F->addFnAttrs(KernelAttrs);
667992cb984SSergei Barannikov 
668992cb984SSergei Barannikov   auto IP = CGF.Builder.saveIP();
669992cb984SSergei Barannikov   auto *BB = llvm::BasicBlock::Create(C, "entry", F);
670992cb984SSergei Barannikov   Builder.SetInsertPoint(BB);
671992cb984SSergei Barannikov   const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy);
672992cb984SSergei Barannikov   auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
673992cb984SSergei Barannikov   BlockPtr->setAlignment(BlockAlign);
674992cb984SSergei Barannikov   Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
675992cb984SSergei Barannikov   auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
676992cb984SSergei Barannikov   llvm::SmallVector<llvm::Value *, 2> Args;
677992cb984SSergei Barannikov   Args.push_back(Cast);
678992cb984SSergei Barannikov   for (llvm::Argument &A : llvm::drop_begin(F->args()))
679992cb984SSergei Barannikov     Args.push_back(&A);
680992cb984SSergei Barannikov   llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
681992cb984SSergei Barannikov   call->setCallingConv(Invoke->getCallingConv());
682992cb984SSergei Barannikov   Builder.CreateRetVoid();
683992cb984SSergei Barannikov   Builder.restoreIP(IP);
684992cb984SSergei Barannikov 
685992cb984SSergei Barannikov   F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
686992cb984SSergei Barannikov   F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
687992cb984SSergei Barannikov   F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
688992cb984SSergei Barannikov   F->setMetadata("kernel_arg_base_type",
689992cb984SSergei Barannikov                  llvm::MDNode::get(C, ArgBaseTypeNames));
690992cb984SSergei Barannikov   F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
691992cb984SSergei Barannikov   if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
692992cb984SSergei Barannikov     F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));
693992cb984SSergei Barannikov 
694992cb984SSergei Barannikov   return F;
695992cb984SSergei Barannikov }
696992cb984SSergei Barannikov 
69708a22076SJohannes Doerfert void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr(
69808a22076SJohannes Doerfert     llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS,
6990ba57c8bSJohannes Doerfert     const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal,
7000ba57c8bSJohannes Doerfert     int32_t *MaxThreadsVal) {
70108a22076SJohannes Doerfert   unsigned Min = 0;
70208a22076SJohannes Doerfert   unsigned Max = 0;
70308a22076SJohannes Doerfert   if (FlatWGS) {
70408a22076SJohannes Doerfert     Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
70508a22076SJohannes Doerfert     Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue();
70608a22076SJohannes Doerfert   }
70708a22076SJohannes Doerfert   if (ReqdWGS && Min == 0 && Max == 0)
70808a22076SJohannes Doerfert     Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
70908a22076SJohannes Doerfert 
71008a22076SJohannes Doerfert   if (Min != 0) {
71108a22076SJohannes Doerfert     assert(Min <= Max && "Min must be less than or equal Max");
71208a22076SJohannes Doerfert 
7130ba57c8bSJohannes Doerfert     if (MinThreadsVal)
7140ba57c8bSJohannes Doerfert       *MinThreadsVal = Min;
7150ba57c8bSJohannes Doerfert     if (MaxThreadsVal)
7160ba57c8bSJohannes Doerfert       *MaxThreadsVal = Max;
71708a22076SJohannes Doerfert     std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
7180ba57c8bSJohannes Doerfert     if (F)
71908a22076SJohannes Doerfert       F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
72008a22076SJohannes Doerfert   } else
72108a22076SJohannes Doerfert     assert(Max == 0 && "Max must be zero");
72208a22076SJohannes Doerfert }
72308a22076SJohannes Doerfert 
72408a22076SJohannes Doerfert void CodeGenModule::handleAMDGPUWavesPerEUAttr(
72508a22076SJohannes Doerfert     llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) {
72608a22076SJohannes Doerfert   unsigned Min =
72708a22076SJohannes Doerfert       Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
72808a22076SJohannes Doerfert   unsigned Max =
72908a22076SJohannes Doerfert       Attr->getMax()
73008a22076SJohannes Doerfert           ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue()
73108a22076SJohannes Doerfert           : 0;
73208a22076SJohannes Doerfert 
73308a22076SJohannes Doerfert   if (Min != 0) {
73408a22076SJohannes Doerfert     assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
73508a22076SJohannes Doerfert 
73608a22076SJohannes Doerfert     std::string AttrVal = llvm::utostr(Min);
73708a22076SJohannes Doerfert     if (Max != 0)
73808a22076SJohannes Doerfert       AttrVal = AttrVal + "," + llvm::utostr(Max);
73908a22076SJohannes Doerfert     F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
74008a22076SJohannes Doerfert   } else
74108a22076SJohannes Doerfert     assert(Max == 0 && "Max must be zero");
74208a22076SJohannes Doerfert }
74308a22076SJohannes Doerfert 
744992cb984SSergei Barannikov std::unique_ptr<TargetCodeGenInfo>
745992cb984SSergei Barannikov CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) {
746992cb984SSergei Barannikov   return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes());
747992cb984SSergei Barannikov }
748