xref: /freebsd-src/contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric //===- AMDGPU.cpp ---------------------------------------------------------===//
2*06c3fb27SDimitry Andric //
3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
8*06c3fb27SDimitry Andric 
9*06c3fb27SDimitry Andric #include "ABIInfoImpl.h"
10*06c3fb27SDimitry Andric #include "TargetInfo.h"
11*06c3fb27SDimitry Andric 
12*06c3fb27SDimitry Andric using namespace clang;
13*06c3fb27SDimitry Andric using namespace clang::CodeGen;
14*06c3fb27SDimitry Andric 
15*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
16*06c3fb27SDimitry Andric // AMDGPU ABI Implementation
17*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
18*06c3fb27SDimitry Andric 
19*06c3fb27SDimitry Andric namespace {
20*06c3fb27SDimitry Andric 
21*06c3fb27SDimitry Andric class AMDGPUABIInfo final : public DefaultABIInfo {
22*06c3fb27SDimitry Andric private:
23*06c3fb27SDimitry Andric   static const unsigned MaxNumRegsForArgsRet = 16;
24*06c3fb27SDimitry Andric 
25*06c3fb27SDimitry Andric   unsigned numRegsForType(QualType Ty) const;
26*06c3fb27SDimitry Andric 
27*06c3fb27SDimitry Andric   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
28*06c3fb27SDimitry Andric   bool isHomogeneousAggregateSmallEnough(const Type *Base,
29*06c3fb27SDimitry Andric                                          uint64_t Members) const override;
30*06c3fb27SDimitry Andric 
31*06c3fb27SDimitry Andric   // Coerce HIP scalar pointer arguments from generic pointers to global ones.
32*06c3fb27SDimitry Andric   llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
33*06c3fb27SDimitry Andric                                        unsigned ToAS) const {
34*06c3fb27SDimitry Andric     // Single value types.
35*06c3fb27SDimitry Andric     auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
36*06c3fb27SDimitry Andric     if (PtrTy && PtrTy->getAddressSpace() == FromAS)
37*06c3fb27SDimitry Andric       return llvm::PointerType::get(Ty->getContext(), ToAS);
38*06c3fb27SDimitry Andric     return Ty;
39*06c3fb27SDimitry Andric   }
40*06c3fb27SDimitry Andric 
41*06c3fb27SDimitry Andric public:
42*06c3fb27SDimitry Andric   explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
43*06c3fb27SDimitry Andric     DefaultABIInfo(CGT) {}
44*06c3fb27SDimitry Andric 
45*06c3fb27SDimitry Andric   ABIArgInfo classifyReturnType(QualType RetTy) const;
46*06c3fb27SDimitry Andric   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
47*06c3fb27SDimitry Andric   ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;
48*06c3fb27SDimitry Andric 
49*06c3fb27SDimitry Andric   void computeInfo(CGFunctionInfo &FI) const override;
50*06c3fb27SDimitry Andric   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
51*06c3fb27SDimitry Andric                     QualType Ty) const override;
52*06c3fb27SDimitry Andric };
53*06c3fb27SDimitry Andric 
54*06c3fb27SDimitry Andric bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
55*06c3fb27SDimitry Andric   return true;
56*06c3fb27SDimitry Andric }
57*06c3fb27SDimitry Andric 
58*06c3fb27SDimitry Andric bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
59*06c3fb27SDimitry Andric   const Type *Base, uint64_t Members) const {
60*06c3fb27SDimitry Andric   uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
61*06c3fb27SDimitry Andric 
62*06c3fb27SDimitry Andric   // Homogeneous Aggregates may occupy at most 16 registers.
63*06c3fb27SDimitry Andric   return Members * NumRegs <= MaxNumRegsForArgsRet;
64*06c3fb27SDimitry Andric }
65*06c3fb27SDimitry Andric 
66*06c3fb27SDimitry Andric /// Estimate number of registers the type will use when passed in registers.
67*06c3fb27SDimitry Andric unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
68*06c3fb27SDimitry Andric   unsigned NumRegs = 0;
69*06c3fb27SDimitry Andric 
70*06c3fb27SDimitry Andric   if (const VectorType *VT = Ty->getAs<VectorType>()) {
71*06c3fb27SDimitry Andric     // Compute from the number of elements. The reported size is based on the
72*06c3fb27SDimitry Andric     // in-memory size, which includes the padding 4th element for 3-vectors.
73*06c3fb27SDimitry Andric     QualType EltTy = VT->getElementType();
74*06c3fb27SDimitry Andric     unsigned EltSize = getContext().getTypeSize(EltTy);
75*06c3fb27SDimitry Andric 
76*06c3fb27SDimitry Andric     // 16-bit element vectors should be passed as packed.
77*06c3fb27SDimitry Andric     if (EltSize == 16)
78*06c3fb27SDimitry Andric       return (VT->getNumElements() + 1) / 2;
79*06c3fb27SDimitry Andric 
80*06c3fb27SDimitry Andric     unsigned EltNumRegs = (EltSize + 31) / 32;
81*06c3fb27SDimitry Andric     return EltNumRegs * VT->getNumElements();
82*06c3fb27SDimitry Andric   }
83*06c3fb27SDimitry Andric 
84*06c3fb27SDimitry Andric   if (const RecordType *RT = Ty->getAs<RecordType>()) {
85*06c3fb27SDimitry Andric     const RecordDecl *RD = RT->getDecl();
86*06c3fb27SDimitry Andric     assert(!RD->hasFlexibleArrayMember());
87*06c3fb27SDimitry Andric 
88*06c3fb27SDimitry Andric     for (const FieldDecl *Field : RD->fields()) {
89*06c3fb27SDimitry Andric       QualType FieldTy = Field->getType();
90*06c3fb27SDimitry Andric       NumRegs += numRegsForType(FieldTy);
91*06c3fb27SDimitry Andric     }
92*06c3fb27SDimitry Andric 
93*06c3fb27SDimitry Andric     return NumRegs;
94*06c3fb27SDimitry Andric   }
95*06c3fb27SDimitry Andric 
96*06c3fb27SDimitry Andric   return (getContext().getTypeSize(Ty) + 31) / 32;
97*06c3fb27SDimitry Andric }
98*06c3fb27SDimitry Andric 
99*06c3fb27SDimitry Andric void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
100*06c3fb27SDimitry Andric   llvm::CallingConv::ID CC = FI.getCallingConvention();
101*06c3fb27SDimitry Andric 
102*06c3fb27SDimitry Andric   if (!getCXXABI().classifyReturnType(FI))
103*06c3fb27SDimitry Andric     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
104*06c3fb27SDimitry Andric 
105*06c3fb27SDimitry Andric   unsigned NumRegsLeft = MaxNumRegsForArgsRet;
106*06c3fb27SDimitry Andric   for (auto &Arg : FI.arguments()) {
107*06c3fb27SDimitry Andric     if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
108*06c3fb27SDimitry Andric       Arg.info = classifyKernelArgumentType(Arg.type);
109*06c3fb27SDimitry Andric     } else {
110*06c3fb27SDimitry Andric       Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
111*06c3fb27SDimitry Andric     }
112*06c3fb27SDimitry Andric   }
113*06c3fb27SDimitry Andric }
114*06c3fb27SDimitry Andric 
115*06c3fb27SDimitry Andric Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
116*06c3fb27SDimitry Andric                                  QualType Ty) const {
117*06c3fb27SDimitry Andric   llvm_unreachable("AMDGPU does not support varargs");
118*06c3fb27SDimitry Andric }
119*06c3fb27SDimitry Andric 
120*06c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
121*06c3fb27SDimitry Andric   if (isAggregateTypeForABI(RetTy)) {
122*06c3fb27SDimitry Andric     // Records with non-trivial destructors/copy-constructors should not be
123*06c3fb27SDimitry Andric     // returned by value.
124*06c3fb27SDimitry Andric     if (!getRecordArgABI(RetTy, getCXXABI())) {
125*06c3fb27SDimitry Andric       // Ignore empty structs/unions.
126*06c3fb27SDimitry Andric       if (isEmptyRecord(getContext(), RetTy, true))
127*06c3fb27SDimitry Andric         return ABIArgInfo::getIgnore();
128*06c3fb27SDimitry Andric 
129*06c3fb27SDimitry Andric       // Lower single-element structs to just return a regular value.
130*06c3fb27SDimitry Andric       if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
131*06c3fb27SDimitry Andric         return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
132*06c3fb27SDimitry Andric 
133*06c3fb27SDimitry Andric       if (const RecordType *RT = RetTy->getAs<RecordType>()) {
134*06c3fb27SDimitry Andric         const RecordDecl *RD = RT->getDecl();
135*06c3fb27SDimitry Andric         if (RD->hasFlexibleArrayMember())
136*06c3fb27SDimitry Andric           return DefaultABIInfo::classifyReturnType(RetTy);
137*06c3fb27SDimitry Andric       }
138*06c3fb27SDimitry Andric 
139*06c3fb27SDimitry Andric       // Pack aggregates <= 4 bytes into single VGPR or pair.
140*06c3fb27SDimitry Andric       uint64_t Size = getContext().getTypeSize(RetTy);
141*06c3fb27SDimitry Andric       if (Size <= 16)
142*06c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
143*06c3fb27SDimitry Andric 
144*06c3fb27SDimitry Andric       if (Size <= 32)
145*06c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
146*06c3fb27SDimitry Andric 
147*06c3fb27SDimitry Andric       if (Size <= 64) {
148*06c3fb27SDimitry Andric         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
149*06c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
150*06c3fb27SDimitry Andric       }
151*06c3fb27SDimitry Andric 
152*06c3fb27SDimitry Andric       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
153*06c3fb27SDimitry Andric         return ABIArgInfo::getDirect();
154*06c3fb27SDimitry Andric     }
155*06c3fb27SDimitry Andric   }
156*06c3fb27SDimitry Andric 
157*06c3fb27SDimitry Andric   // Otherwise just do the default thing.
158*06c3fb27SDimitry Andric   return DefaultABIInfo::classifyReturnType(RetTy);
159*06c3fb27SDimitry Andric }
160*06c3fb27SDimitry Andric 
161*06c3fb27SDimitry Andric /// For kernels all parameters are really passed in a special buffer. It doesn't
162*06c3fb27SDimitry Andric /// make sense to pass anything byval, so everything must be direct.
163*06c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
164*06c3fb27SDimitry Andric   Ty = useFirstFieldIfTransparentUnion(Ty);
165*06c3fb27SDimitry Andric 
166*06c3fb27SDimitry Andric   // TODO: Can we omit empty structs?
167*06c3fb27SDimitry Andric 
168*06c3fb27SDimitry Andric   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
169*06c3fb27SDimitry Andric     Ty = QualType(SeltTy, 0);
170*06c3fb27SDimitry Andric 
171*06c3fb27SDimitry Andric   llvm::Type *OrigLTy = CGT.ConvertType(Ty);
172*06c3fb27SDimitry Andric   llvm::Type *LTy = OrigLTy;
173*06c3fb27SDimitry Andric   if (getContext().getLangOpts().HIP) {
174*06c3fb27SDimitry Andric     LTy = coerceKernelArgumentType(
175*06c3fb27SDimitry Andric         OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
176*06c3fb27SDimitry Andric         /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
177*06c3fb27SDimitry Andric   }
178*06c3fb27SDimitry Andric 
179*06c3fb27SDimitry Andric   // FIXME: Should also use this for OpenCL, but it requires addressing the
180*06c3fb27SDimitry Andric   // problem of kernels being called.
181*06c3fb27SDimitry Andric   //
182*06c3fb27SDimitry Andric   // FIXME: This doesn't apply the optimization of coercing pointers in structs
183*06c3fb27SDimitry Andric   // to global address space when using byref. This would require implementing a
184*06c3fb27SDimitry Andric   // new kind of coercion of the in-memory type when for indirect arguments.
185*06c3fb27SDimitry Andric   if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
186*06c3fb27SDimitry Andric       isAggregateTypeForABI(Ty)) {
187*06c3fb27SDimitry Andric     return ABIArgInfo::getIndirectAliased(
188*06c3fb27SDimitry Andric         getContext().getTypeAlignInChars(Ty),
189*06c3fb27SDimitry Andric         getContext().getTargetAddressSpace(LangAS::opencl_constant),
190*06c3fb27SDimitry Andric         false /*Realign*/, nullptr /*Padding*/);
191*06c3fb27SDimitry Andric   }
192*06c3fb27SDimitry Andric 
193*06c3fb27SDimitry Andric   // If we set CanBeFlattened to true, CodeGen will expand the struct to its
194*06c3fb27SDimitry Andric   // individual elements, which confuses the Clover OpenCL backend; therefore we
195*06c3fb27SDimitry Andric   // have to set it to false here. Other args of getDirect() are just defaults.
196*06c3fb27SDimitry Andric   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
197*06c3fb27SDimitry Andric }
198*06c3fb27SDimitry Andric 
199*06c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
200*06c3fb27SDimitry Andric                                                unsigned &NumRegsLeft) const {
201*06c3fb27SDimitry Andric   assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
202*06c3fb27SDimitry Andric 
203*06c3fb27SDimitry Andric   Ty = useFirstFieldIfTransparentUnion(Ty);
204*06c3fb27SDimitry Andric 
205*06c3fb27SDimitry Andric   if (isAggregateTypeForABI(Ty)) {
206*06c3fb27SDimitry Andric     // Records with non-trivial destructors/copy-constructors should not be
207*06c3fb27SDimitry Andric     // passed by value.
208*06c3fb27SDimitry Andric     if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
209*06c3fb27SDimitry Andric       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
210*06c3fb27SDimitry Andric 
211*06c3fb27SDimitry Andric     // Ignore empty structs/unions.
212*06c3fb27SDimitry Andric     if (isEmptyRecord(getContext(), Ty, true))
213*06c3fb27SDimitry Andric       return ABIArgInfo::getIgnore();
214*06c3fb27SDimitry Andric 
215*06c3fb27SDimitry Andric     // Lower single-element structs to just pass a regular value. TODO: We
216*06c3fb27SDimitry Andric     // could do reasonable-size multiple-element structs too, using getExpand(),
217*06c3fb27SDimitry Andric     // though watch out for things like bitfields.
218*06c3fb27SDimitry Andric     if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
219*06c3fb27SDimitry Andric       return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
220*06c3fb27SDimitry Andric 
221*06c3fb27SDimitry Andric     if (const RecordType *RT = Ty->getAs<RecordType>()) {
222*06c3fb27SDimitry Andric       const RecordDecl *RD = RT->getDecl();
223*06c3fb27SDimitry Andric       if (RD->hasFlexibleArrayMember())
224*06c3fb27SDimitry Andric         return DefaultABIInfo::classifyArgumentType(Ty);
225*06c3fb27SDimitry Andric     }
226*06c3fb27SDimitry Andric 
227*06c3fb27SDimitry Andric     // Pack aggregates <= 8 bytes into single VGPR or pair.
228*06c3fb27SDimitry Andric     uint64_t Size = getContext().getTypeSize(Ty);
229*06c3fb27SDimitry Andric     if (Size <= 64) {
230*06c3fb27SDimitry Andric       unsigned NumRegs = (Size + 31) / 32;
231*06c3fb27SDimitry Andric       NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
232*06c3fb27SDimitry Andric 
233*06c3fb27SDimitry Andric       if (Size <= 16)
234*06c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
235*06c3fb27SDimitry Andric 
236*06c3fb27SDimitry Andric       if (Size <= 32)
237*06c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
238*06c3fb27SDimitry Andric 
239*06c3fb27SDimitry Andric       // XXX: Should this be i64 instead, and should the limit increase?
240*06c3fb27SDimitry Andric       llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
241*06c3fb27SDimitry Andric       return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
242*06c3fb27SDimitry Andric     }
243*06c3fb27SDimitry Andric 
244*06c3fb27SDimitry Andric     if (NumRegsLeft > 0) {
245*06c3fb27SDimitry Andric       unsigned NumRegs = numRegsForType(Ty);
246*06c3fb27SDimitry Andric       if (NumRegsLeft >= NumRegs) {
247*06c3fb27SDimitry Andric         NumRegsLeft -= NumRegs;
248*06c3fb27SDimitry Andric         return ABIArgInfo::getDirect();
249*06c3fb27SDimitry Andric       }
250*06c3fb27SDimitry Andric     }
251*06c3fb27SDimitry Andric   }
252*06c3fb27SDimitry Andric 
253*06c3fb27SDimitry Andric   // Otherwise just do the default thing.
254*06c3fb27SDimitry Andric   ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
255*06c3fb27SDimitry Andric   if (!ArgInfo.isIndirect()) {
256*06c3fb27SDimitry Andric     unsigned NumRegs = numRegsForType(Ty);
257*06c3fb27SDimitry Andric     NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
258*06c3fb27SDimitry Andric   }
259*06c3fb27SDimitry Andric 
260*06c3fb27SDimitry Andric   return ArgInfo;
261*06c3fb27SDimitry Andric }
262*06c3fb27SDimitry Andric 
263*06c3fb27SDimitry Andric class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
264*06c3fb27SDimitry Andric public:
265*06c3fb27SDimitry Andric   AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
266*06c3fb27SDimitry Andric       : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
267*06c3fb27SDimitry Andric 
268*06c3fb27SDimitry Andric   void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
269*06c3fb27SDimitry Andric                                  CodeGenModule &CGM) const;
270*06c3fb27SDimitry Andric 
271*06c3fb27SDimitry Andric   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
272*06c3fb27SDimitry Andric                            CodeGen::CodeGenModule &M) const override;
273*06c3fb27SDimitry Andric   unsigned getOpenCLKernelCallingConv() const override;
274*06c3fb27SDimitry Andric 
275*06c3fb27SDimitry Andric   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
276*06c3fb27SDimitry Andric       llvm::PointerType *T, QualType QT) const override;
277*06c3fb27SDimitry Andric 
278*06c3fb27SDimitry Andric   LangAS getASTAllocaAddressSpace() const override {
279*06c3fb27SDimitry Andric     return getLangASFromTargetAS(
280*06c3fb27SDimitry Andric         getABIInfo().getDataLayout().getAllocaAddrSpace());
281*06c3fb27SDimitry Andric   }
282*06c3fb27SDimitry Andric   LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
283*06c3fb27SDimitry Andric                                   const VarDecl *D) const override;
284*06c3fb27SDimitry Andric   llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
285*06c3fb27SDimitry Andric                                          SyncScope Scope,
286*06c3fb27SDimitry Andric                                          llvm::AtomicOrdering Ordering,
287*06c3fb27SDimitry Andric                                          llvm::LLVMContext &Ctx) const override;
288*06c3fb27SDimitry Andric   llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
289*06c3fb27SDimitry Andric                                          llvm::Function *BlockInvokeFunc,
290*06c3fb27SDimitry Andric                                          llvm::Type *BlockTy) const override;
291*06c3fb27SDimitry Andric   bool shouldEmitStaticExternCAliases() const override;
292*06c3fb27SDimitry Andric   bool shouldEmitDWARFBitFieldSeparators() const override;
293*06c3fb27SDimitry Andric   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
294*06c3fb27SDimitry Andric };
295*06c3fb27SDimitry Andric }
296*06c3fb27SDimitry Andric 
297*06c3fb27SDimitry Andric static bool requiresAMDGPUProtectedVisibility(const Decl *D,
298*06c3fb27SDimitry Andric                                               llvm::GlobalValue *GV) {
299*06c3fb27SDimitry Andric   if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
300*06c3fb27SDimitry Andric     return false;
301*06c3fb27SDimitry Andric 
302*06c3fb27SDimitry Andric   return D->hasAttr<OpenCLKernelAttr>() ||
303*06c3fb27SDimitry Andric          (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
304*06c3fb27SDimitry Andric          (isa<VarDecl>(D) &&
305*06c3fb27SDimitry Andric           (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
306*06c3fb27SDimitry Andric            cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
307*06c3fb27SDimitry Andric            cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
308*06c3fb27SDimitry Andric }
309*06c3fb27SDimitry Andric 
310*06c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
311*06c3fb27SDimitry Andric     const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const {
312*06c3fb27SDimitry Andric   const auto *ReqdWGS =
313*06c3fb27SDimitry Andric       M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
314*06c3fb27SDimitry Andric   const bool IsOpenCLKernel =
315*06c3fb27SDimitry Andric       M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
316*06c3fb27SDimitry Andric   const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
317*06c3fb27SDimitry Andric 
318*06c3fb27SDimitry Andric   const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
319*06c3fb27SDimitry Andric   if (ReqdWGS || FlatWGS) {
320*06c3fb27SDimitry Andric     unsigned Min = 0;
321*06c3fb27SDimitry Andric     unsigned Max = 0;
322*06c3fb27SDimitry Andric     if (FlatWGS) {
323*06c3fb27SDimitry Andric       Min = FlatWGS->getMin()
324*06c3fb27SDimitry Andric                 ->EvaluateKnownConstInt(M.getContext())
325*06c3fb27SDimitry Andric                 .getExtValue();
326*06c3fb27SDimitry Andric       Max = FlatWGS->getMax()
327*06c3fb27SDimitry Andric                 ->EvaluateKnownConstInt(M.getContext())
328*06c3fb27SDimitry Andric                 .getExtValue();
329*06c3fb27SDimitry Andric     }
330*06c3fb27SDimitry Andric     if (ReqdWGS && Min == 0 && Max == 0)
331*06c3fb27SDimitry Andric       Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
332*06c3fb27SDimitry Andric 
333*06c3fb27SDimitry Andric     if (Min != 0) {
334*06c3fb27SDimitry Andric       assert(Min <= Max && "Min must be less than or equal Max");
335*06c3fb27SDimitry Andric 
336*06c3fb27SDimitry Andric       std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
337*06c3fb27SDimitry Andric       F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
338*06c3fb27SDimitry Andric     } else
339*06c3fb27SDimitry Andric       assert(Max == 0 && "Max must be zero");
340*06c3fb27SDimitry Andric   } else if (IsOpenCLKernel || IsHIPKernel) {
341*06c3fb27SDimitry Andric     // By default, restrict the maximum size to a value specified by
342*06c3fb27SDimitry Andric     // --gpu-max-threads-per-block=n or its default value for HIP.
343*06c3fb27SDimitry Andric     const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
344*06c3fb27SDimitry Andric     const unsigned DefaultMaxWorkGroupSize =
345*06c3fb27SDimitry Andric         IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
346*06c3fb27SDimitry Andric                        : M.getLangOpts().GPUMaxThreadsPerBlock;
347*06c3fb27SDimitry Andric     std::string AttrVal =
348*06c3fb27SDimitry Andric         std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
349*06c3fb27SDimitry Andric     F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
350*06c3fb27SDimitry Andric   }
351*06c3fb27SDimitry Andric 
352*06c3fb27SDimitry Andric   if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
353*06c3fb27SDimitry Andric     unsigned Min =
354*06c3fb27SDimitry Andric         Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue();
355*06c3fb27SDimitry Andric     unsigned Max = Attr->getMax() ? Attr->getMax()
356*06c3fb27SDimitry Andric                                         ->EvaluateKnownConstInt(M.getContext())
357*06c3fb27SDimitry Andric                                         .getExtValue()
358*06c3fb27SDimitry Andric                                   : 0;
359*06c3fb27SDimitry Andric 
360*06c3fb27SDimitry Andric     if (Min != 0) {
361*06c3fb27SDimitry Andric       assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
362*06c3fb27SDimitry Andric 
363*06c3fb27SDimitry Andric       std::string AttrVal = llvm::utostr(Min);
364*06c3fb27SDimitry Andric       if (Max != 0)
365*06c3fb27SDimitry Andric         AttrVal = AttrVal + "," + llvm::utostr(Max);
366*06c3fb27SDimitry Andric       F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
367*06c3fb27SDimitry Andric     } else
368*06c3fb27SDimitry Andric       assert(Max == 0 && "Max must be zero");
369*06c3fb27SDimitry Andric   }
370*06c3fb27SDimitry Andric 
371*06c3fb27SDimitry Andric   if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
372*06c3fb27SDimitry Andric     unsigned NumSGPR = Attr->getNumSGPR();
373*06c3fb27SDimitry Andric 
374*06c3fb27SDimitry Andric     if (NumSGPR != 0)
375*06c3fb27SDimitry Andric       F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR));
376*06c3fb27SDimitry Andric   }
377*06c3fb27SDimitry Andric 
378*06c3fb27SDimitry Andric   if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
379*06c3fb27SDimitry Andric     uint32_t NumVGPR = Attr->getNumVGPR();
380*06c3fb27SDimitry Andric 
381*06c3fb27SDimitry Andric     if (NumVGPR != 0)
382*06c3fb27SDimitry Andric       F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
383*06c3fb27SDimitry Andric   }
384*06c3fb27SDimitry Andric }
385*06c3fb27SDimitry Andric 
386*06c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setTargetAttributes(
387*06c3fb27SDimitry Andric     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
388*06c3fb27SDimitry Andric   if (requiresAMDGPUProtectedVisibility(D, GV)) {
389*06c3fb27SDimitry Andric     GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
390*06c3fb27SDimitry Andric     GV->setDSOLocal(true);
391*06c3fb27SDimitry Andric   }
392*06c3fb27SDimitry Andric 
393*06c3fb27SDimitry Andric   if (GV->isDeclaration())
394*06c3fb27SDimitry Andric     return;
395*06c3fb27SDimitry Andric 
396*06c3fb27SDimitry Andric   llvm::Function *F = dyn_cast<llvm::Function>(GV);
397*06c3fb27SDimitry Andric   if (!F)
398*06c3fb27SDimitry Andric     return;
399*06c3fb27SDimitry Andric 
400*06c3fb27SDimitry Andric   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
401*06c3fb27SDimitry Andric   if (FD)
402*06c3fb27SDimitry Andric     setFunctionDeclAttributes(FD, F, M);
403*06c3fb27SDimitry Andric 
404*06c3fb27SDimitry Andric   const bool IsHIPKernel =
405*06c3fb27SDimitry Andric       M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
406*06c3fb27SDimitry Andric 
407*06c3fb27SDimitry Andric   // TODO: This should be moved to language specific attributes instead.
408*06c3fb27SDimitry Andric   if (IsHIPKernel)
409*06c3fb27SDimitry Andric     F->addFnAttr("uniform-work-group-size", "true");
410*06c3fb27SDimitry Andric 
411*06c3fb27SDimitry Andric   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
412*06c3fb27SDimitry Andric     F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
413*06c3fb27SDimitry Andric 
414*06c3fb27SDimitry Andric   if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
415*06c3fb27SDimitry Andric     F->addFnAttr("amdgpu-ieee", "false");
416*06c3fb27SDimitry Andric }
417*06c3fb27SDimitry Andric 
418*06c3fb27SDimitry Andric unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
419*06c3fb27SDimitry Andric   return llvm::CallingConv::AMDGPU_KERNEL;
420*06c3fb27SDimitry Andric }
421*06c3fb27SDimitry Andric 
422*06c3fb27SDimitry Andric // Currently LLVM assumes null pointers always have value 0,
423*06c3fb27SDimitry Andric // which results in incorrectly transformed IR. Therefore, instead of
424*06c3fb27SDimitry Andric // emitting null pointers in private and local address spaces, a null
425*06c3fb27SDimitry Andric // pointer in generic address space is emitted which is casted to a
426*06c3fb27SDimitry Andric // pointer in local or private address space.
427*06c3fb27SDimitry Andric llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
428*06c3fb27SDimitry Andric     const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT,
429*06c3fb27SDimitry Andric     QualType QT) const {
430*06c3fb27SDimitry Andric   if (CGM.getContext().getTargetNullPointerValue(QT) == 0)
431*06c3fb27SDimitry Andric     return llvm::ConstantPointerNull::get(PT);
432*06c3fb27SDimitry Andric 
433*06c3fb27SDimitry Andric   auto &Ctx = CGM.getContext();
434*06c3fb27SDimitry Andric   auto NPT = llvm::PointerType::get(
435*06c3fb27SDimitry Andric       PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
436*06c3fb27SDimitry Andric   return llvm::ConstantExpr::getAddrSpaceCast(
437*06c3fb27SDimitry Andric       llvm::ConstantPointerNull::get(NPT), PT);
438*06c3fb27SDimitry Andric }
439*06c3fb27SDimitry Andric 
440*06c3fb27SDimitry Andric LangAS
441*06c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
442*06c3fb27SDimitry Andric                                                   const VarDecl *D) const {
443*06c3fb27SDimitry Andric   assert(!CGM.getLangOpts().OpenCL &&
444*06c3fb27SDimitry Andric          !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
445*06c3fb27SDimitry Andric          "Address space agnostic languages only");
446*06c3fb27SDimitry Andric   LangAS DefaultGlobalAS = getLangASFromTargetAS(
447*06c3fb27SDimitry Andric       CGM.getContext().getTargetAddressSpace(LangAS::opencl_global));
448*06c3fb27SDimitry Andric   if (!D)
449*06c3fb27SDimitry Andric     return DefaultGlobalAS;
450*06c3fb27SDimitry Andric 
451*06c3fb27SDimitry Andric   LangAS AddrSpace = D->getType().getAddressSpace();
452*06c3fb27SDimitry Andric   assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace));
453*06c3fb27SDimitry Andric   if (AddrSpace != LangAS::Default)
454*06c3fb27SDimitry Andric     return AddrSpace;
455*06c3fb27SDimitry Andric 
456*06c3fb27SDimitry Andric   // Only promote to address space 4 if VarDecl has constant initialization.
457*06c3fb27SDimitry Andric   if (CGM.isTypeConstant(D->getType(), false, false) &&
458*06c3fb27SDimitry Andric       D->hasConstantInitialization()) {
459*06c3fb27SDimitry Andric     if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
460*06c3fb27SDimitry Andric       return *ConstAS;
461*06c3fb27SDimitry Andric   }
462*06c3fb27SDimitry Andric   return DefaultGlobalAS;
463*06c3fb27SDimitry Andric }
464*06c3fb27SDimitry Andric 
465*06c3fb27SDimitry Andric llvm::SyncScope::ID
466*06c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
467*06c3fb27SDimitry Andric                                             SyncScope Scope,
468*06c3fb27SDimitry Andric                                             llvm::AtomicOrdering Ordering,
469*06c3fb27SDimitry Andric                                             llvm::LLVMContext &Ctx) const {
470*06c3fb27SDimitry Andric   std::string Name;
471*06c3fb27SDimitry Andric   switch (Scope) {
472*06c3fb27SDimitry Andric   case SyncScope::HIPSingleThread:
473*06c3fb27SDimitry Andric     Name = "singlethread";
474*06c3fb27SDimitry Andric     break;
475*06c3fb27SDimitry Andric   case SyncScope::HIPWavefront:
476*06c3fb27SDimitry Andric   case SyncScope::OpenCLSubGroup:
477*06c3fb27SDimitry Andric     Name = "wavefront";
478*06c3fb27SDimitry Andric     break;
479*06c3fb27SDimitry Andric   case SyncScope::HIPWorkgroup:
480*06c3fb27SDimitry Andric   case SyncScope::OpenCLWorkGroup:
481*06c3fb27SDimitry Andric     Name = "workgroup";
482*06c3fb27SDimitry Andric     break;
483*06c3fb27SDimitry Andric   case SyncScope::HIPAgent:
484*06c3fb27SDimitry Andric   case SyncScope::OpenCLDevice:
485*06c3fb27SDimitry Andric     Name = "agent";
486*06c3fb27SDimitry Andric     break;
487*06c3fb27SDimitry Andric   case SyncScope::HIPSystem:
488*06c3fb27SDimitry Andric   case SyncScope::OpenCLAllSVMDevices:
489*06c3fb27SDimitry Andric     Name = "";
490*06c3fb27SDimitry Andric     break;
491*06c3fb27SDimitry Andric   }
492*06c3fb27SDimitry Andric 
493*06c3fb27SDimitry Andric   if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
494*06c3fb27SDimitry Andric     if (!Name.empty())
495*06c3fb27SDimitry Andric       Name = Twine(Twine(Name) + Twine("-")).str();
496*06c3fb27SDimitry Andric 
497*06c3fb27SDimitry Andric     Name = Twine(Twine(Name) + Twine("one-as")).str();
498*06c3fb27SDimitry Andric   }
499*06c3fb27SDimitry Andric 
500*06c3fb27SDimitry Andric   return Ctx.getOrInsertSyncScopeID(Name);
501*06c3fb27SDimitry Andric }
502*06c3fb27SDimitry Andric 
503*06c3fb27SDimitry Andric bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
504*06c3fb27SDimitry Andric   return false;
505*06c3fb27SDimitry Andric }
506*06c3fb27SDimitry Andric 
507*06c3fb27SDimitry Andric bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const {
508*06c3fb27SDimitry Andric   return true;
509*06c3fb27SDimitry Andric }
510*06c3fb27SDimitry Andric 
511*06c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
512*06c3fb27SDimitry Andric     const FunctionType *&FT) const {
513*06c3fb27SDimitry Andric   FT = getABIInfo().getContext().adjustFunctionType(
514*06c3fb27SDimitry Andric       FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
515*06c3fb27SDimitry Andric }
516*06c3fb27SDimitry Andric 
517*06c3fb27SDimitry Andric /// Create an OpenCL kernel for an enqueued block.
518*06c3fb27SDimitry Andric ///
519*06c3fb27SDimitry Andric /// The type of the first argument (the block literal) is the struct type
520*06c3fb27SDimitry Andric /// of the block literal instead of a pointer type. The first argument
521*06c3fb27SDimitry Andric /// (block literal) is passed directly by value to the kernel. The kernel
522*06c3fb27SDimitry Andric /// allocates the same type of struct on stack and stores the block literal
523*06c3fb27SDimitry Andric /// to it and passes its pointer to the block invoke function. The kernel
524*06c3fb27SDimitry Andric /// has "enqueued-block" function attribute and kernel argument metadata.
525*06c3fb27SDimitry Andric llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
526*06c3fb27SDimitry Andric     CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const {
527*06c3fb27SDimitry Andric   auto &Builder = CGF.Builder;
528*06c3fb27SDimitry Andric   auto &C = CGF.getLLVMContext();
529*06c3fb27SDimitry Andric 
530*06c3fb27SDimitry Andric   auto *InvokeFT = Invoke->getFunctionType();
531*06c3fb27SDimitry Andric   llvm::SmallVector<llvm::Type *, 2> ArgTys;
532*06c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
533*06c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
534*06c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
535*06c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
536*06c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
537*06c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> ArgNames;
538*06c3fb27SDimitry Andric 
539*06c3fb27SDimitry Andric   ArgTys.push_back(BlockTy);
540*06c3fb27SDimitry Andric   ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
541*06c3fb27SDimitry Andric   AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
542*06c3fb27SDimitry Andric   ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
543*06c3fb27SDimitry Andric   ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
544*06c3fb27SDimitry Andric   AccessQuals.push_back(llvm::MDString::get(C, "none"));
545*06c3fb27SDimitry Andric   ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
546*06c3fb27SDimitry Andric   for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
547*06c3fb27SDimitry Andric     ArgTys.push_back(InvokeFT->getParamType(I));
548*06c3fb27SDimitry Andric     ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
549*06c3fb27SDimitry Andric     AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
550*06c3fb27SDimitry Andric     AccessQuals.push_back(llvm::MDString::get(C, "none"));
551*06c3fb27SDimitry Andric     ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
552*06c3fb27SDimitry Andric     ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
553*06c3fb27SDimitry Andric     ArgNames.push_back(
554*06c3fb27SDimitry Andric         llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str()));
555*06c3fb27SDimitry Andric   }
556*06c3fb27SDimitry Andric   std::string Name = Invoke->getName().str() + "_kernel";
557*06c3fb27SDimitry Andric   auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
558*06c3fb27SDimitry Andric   auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
559*06c3fb27SDimitry Andric                                    &CGF.CGM.getModule());
560*06c3fb27SDimitry Andric   F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
561*06c3fb27SDimitry Andric 
562*06c3fb27SDimitry Andric   llvm::AttrBuilder KernelAttrs(C);
563*06c3fb27SDimitry Andric   // FIXME: The invoke isn't applying the right attributes either
564*06c3fb27SDimitry Andric   // FIXME: This is missing setTargetAttributes
565*06c3fb27SDimitry Andric   CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs);
566*06c3fb27SDimitry Andric   KernelAttrs.addAttribute("enqueued-block");
567*06c3fb27SDimitry Andric   F->addFnAttrs(KernelAttrs);
568*06c3fb27SDimitry Andric 
569*06c3fb27SDimitry Andric   auto IP = CGF.Builder.saveIP();
570*06c3fb27SDimitry Andric   auto *BB = llvm::BasicBlock::Create(C, "entry", F);
571*06c3fb27SDimitry Andric   Builder.SetInsertPoint(BB);
572*06c3fb27SDimitry Andric   const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy);
573*06c3fb27SDimitry Andric   auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
574*06c3fb27SDimitry Andric   BlockPtr->setAlignment(BlockAlign);
575*06c3fb27SDimitry Andric   Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
576*06c3fb27SDimitry Andric   auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
577*06c3fb27SDimitry Andric   llvm::SmallVector<llvm::Value *, 2> Args;
578*06c3fb27SDimitry Andric   Args.push_back(Cast);
579*06c3fb27SDimitry Andric   for (llvm::Argument &A : llvm::drop_begin(F->args()))
580*06c3fb27SDimitry Andric     Args.push_back(&A);
581*06c3fb27SDimitry Andric   llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
582*06c3fb27SDimitry Andric   call->setCallingConv(Invoke->getCallingConv());
583*06c3fb27SDimitry Andric   Builder.CreateRetVoid();
584*06c3fb27SDimitry Andric   Builder.restoreIP(IP);
585*06c3fb27SDimitry Andric 
586*06c3fb27SDimitry Andric   F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
587*06c3fb27SDimitry Andric   F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
588*06c3fb27SDimitry Andric   F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
589*06c3fb27SDimitry Andric   F->setMetadata("kernel_arg_base_type",
590*06c3fb27SDimitry Andric                  llvm::MDNode::get(C, ArgBaseTypeNames));
591*06c3fb27SDimitry Andric   F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
592*06c3fb27SDimitry Andric   if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
593*06c3fb27SDimitry Andric     F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));
594*06c3fb27SDimitry Andric 
595*06c3fb27SDimitry Andric   return F;
596*06c3fb27SDimitry Andric }
597*06c3fb27SDimitry Andric 
598*06c3fb27SDimitry Andric std::unique_ptr<TargetCodeGenInfo>
599*06c3fb27SDimitry Andric CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) {
600*06c3fb27SDimitry Andric   return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes());
601*06c3fb27SDimitry Andric }
602