xref: /freebsd-src/contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric //===- AMDGPU.cpp ---------------------------------------------------------===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric 
906c3fb27SDimitry Andric #include "ABIInfoImpl.h"
1006c3fb27SDimitry Andric #include "TargetInfo.h"
115f757f3fSDimitry Andric #include "clang/Basic/TargetOptions.h"
1206c3fb27SDimitry Andric 
1306c3fb27SDimitry Andric using namespace clang;
1406c3fb27SDimitry Andric using namespace clang::CodeGen;
1506c3fb27SDimitry Andric 
1606c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
1706c3fb27SDimitry Andric // AMDGPU ABI Implementation
1806c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
1906c3fb27SDimitry Andric 
2006c3fb27SDimitry Andric namespace {
2106c3fb27SDimitry Andric 
2206c3fb27SDimitry Andric class AMDGPUABIInfo final : public DefaultABIInfo {
2306c3fb27SDimitry Andric private:
2406c3fb27SDimitry Andric   static const unsigned MaxNumRegsForArgsRet = 16;
2506c3fb27SDimitry Andric 
2606c3fb27SDimitry Andric   unsigned numRegsForType(QualType Ty) const;
2706c3fb27SDimitry Andric 
2806c3fb27SDimitry Andric   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
2906c3fb27SDimitry Andric   bool isHomogeneousAggregateSmallEnough(const Type *Base,
3006c3fb27SDimitry Andric                                          uint64_t Members) const override;
3106c3fb27SDimitry Andric 
3206c3fb27SDimitry Andric   // Coerce HIP scalar pointer arguments from generic pointers to global ones.
3306c3fb27SDimitry Andric   llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
3406c3fb27SDimitry Andric                                        unsigned ToAS) const {
3506c3fb27SDimitry Andric     // Single value types.
3606c3fb27SDimitry Andric     auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
3706c3fb27SDimitry Andric     if (PtrTy && PtrTy->getAddressSpace() == FromAS)
3806c3fb27SDimitry Andric       return llvm::PointerType::get(Ty->getContext(), ToAS);
3906c3fb27SDimitry Andric     return Ty;
4006c3fb27SDimitry Andric   }
4106c3fb27SDimitry Andric 
4206c3fb27SDimitry Andric public:
4306c3fb27SDimitry Andric   explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
4406c3fb27SDimitry Andric     DefaultABIInfo(CGT) {}
4506c3fb27SDimitry Andric 
4606c3fb27SDimitry Andric   ABIArgInfo classifyReturnType(QualType RetTy) const;
4706c3fb27SDimitry Andric   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
48*0fca6ea1SDimitry Andric   ABIArgInfo classifyArgumentType(QualType Ty, bool Variadic,
49*0fca6ea1SDimitry Andric                                   unsigned &NumRegsLeft) const;
5006c3fb27SDimitry Andric 
5106c3fb27SDimitry Andric   void computeInfo(CGFunctionInfo &FI) const override;
52*0fca6ea1SDimitry Andric   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
53*0fca6ea1SDimitry Andric                    AggValueSlot Slot) const override;
5406c3fb27SDimitry Andric };
5506c3fb27SDimitry Andric 
5606c3fb27SDimitry Andric bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
5706c3fb27SDimitry Andric   return true;
5806c3fb27SDimitry Andric }
5906c3fb27SDimitry Andric 
6006c3fb27SDimitry Andric bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
6106c3fb27SDimitry Andric   const Type *Base, uint64_t Members) const {
6206c3fb27SDimitry Andric   uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
6306c3fb27SDimitry Andric 
6406c3fb27SDimitry Andric   // Homogeneous Aggregates may occupy at most 16 registers.
6506c3fb27SDimitry Andric   return Members * NumRegs <= MaxNumRegsForArgsRet;
6606c3fb27SDimitry Andric }
6706c3fb27SDimitry Andric 
6806c3fb27SDimitry Andric /// Estimate number of registers the type will use when passed in registers.
6906c3fb27SDimitry Andric unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
7006c3fb27SDimitry Andric   unsigned NumRegs = 0;
7106c3fb27SDimitry Andric 
7206c3fb27SDimitry Andric   if (const VectorType *VT = Ty->getAs<VectorType>()) {
7306c3fb27SDimitry Andric     // Compute from the number of elements. The reported size is based on the
7406c3fb27SDimitry Andric     // in-memory size, which includes the padding 4th element for 3-vectors.
7506c3fb27SDimitry Andric     QualType EltTy = VT->getElementType();
7606c3fb27SDimitry Andric     unsigned EltSize = getContext().getTypeSize(EltTy);
7706c3fb27SDimitry Andric 
7806c3fb27SDimitry Andric     // 16-bit element vectors should be passed as packed.
7906c3fb27SDimitry Andric     if (EltSize == 16)
8006c3fb27SDimitry Andric       return (VT->getNumElements() + 1) / 2;
8106c3fb27SDimitry Andric 
8206c3fb27SDimitry Andric     unsigned EltNumRegs = (EltSize + 31) / 32;
8306c3fb27SDimitry Andric     return EltNumRegs * VT->getNumElements();
8406c3fb27SDimitry Andric   }
8506c3fb27SDimitry Andric 
8606c3fb27SDimitry Andric   if (const RecordType *RT = Ty->getAs<RecordType>()) {
8706c3fb27SDimitry Andric     const RecordDecl *RD = RT->getDecl();
8806c3fb27SDimitry Andric     assert(!RD->hasFlexibleArrayMember());
8906c3fb27SDimitry Andric 
9006c3fb27SDimitry Andric     for (const FieldDecl *Field : RD->fields()) {
9106c3fb27SDimitry Andric       QualType FieldTy = Field->getType();
9206c3fb27SDimitry Andric       NumRegs += numRegsForType(FieldTy);
9306c3fb27SDimitry Andric     }
9406c3fb27SDimitry Andric 
9506c3fb27SDimitry Andric     return NumRegs;
9606c3fb27SDimitry Andric   }
9706c3fb27SDimitry Andric 
9806c3fb27SDimitry Andric   return (getContext().getTypeSize(Ty) + 31) / 32;
9906c3fb27SDimitry Andric }
10006c3fb27SDimitry Andric 
10106c3fb27SDimitry Andric void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
10206c3fb27SDimitry Andric   llvm::CallingConv::ID CC = FI.getCallingConvention();
10306c3fb27SDimitry Andric 
10406c3fb27SDimitry Andric   if (!getCXXABI().classifyReturnType(FI))
10506c3fb27SDimitry Andric     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
10606c3fb27SDimitry Andric 
107*0fca6ea1SDimitry Andric   unsigned ArgumentIndex = 0;
108*0fca6ea1SDimitry Andric   const unsigned numFixedArguments = FI.getNumRequiredArgs();
109*0fca6ea1SDimitry Andric 
11006c3fb27SDimitry Andric   unsigned NumRegsLeft = MaxNumRegsForArgsRet;
11106c3fb27SDimitry Andric   for (auto &Arg : FI.arguments()) {
11206c3fb27SDimitry Andric     if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
11306c3fb27SDimitry Andric       Arg.info = classifyKernelArgumentType(Arg.type);
11406c3fb27SDimitry Andric     } else {
115*0fca6ea1SDimitry Andric       bool FixedArgument = ArgumentIndex++ < numFixedArguments;
116*0fca6ea1SDimitry Andric       Arg.info = classifyArgumentType(Arg.type, !FixedArgument, NumRegsLeft);
11706c3fb27SDimitry Andric     }
11806c3fb27SDimitry Andric   }
11906c3fb27SDimitry Andric }
12006c3fb27SDimitry Andric 
121*0fca6ea1SDimitry Andric RValue AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
122*0fca6ea1SDimitry Andric                                 QualType Ty, AggValueSlot Slot) const {
123*0fca6ea1SDimitry Andric   const bool IsIndirect = false;
124*0fca6ea1SDimitry Andric   const bool AllowHigherAlign = false;
125*0fca6ea1SDimitry Andric   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
126*0fca6ea1SDimitry Andric                           getContext().getTypeInfoInChars(Ty),
127*0fca6ea1SDimitry Andric                           CharUnits::fromQuantity(4), AllowHigherAlign, Slot);
12806c3fb27SDimitry Andric }
12906c3fb27SDimitry Andric 
13006c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
13106c3fb27SDimitry Andric   if (isAggregateTypeForABI(RetTy)) {
13206c3fb27SDimitry Andric     // Records with non-trivial destructors/copy-constructors should not be
13306c3fb27SDimitry Andric     // returned by value.
13406c3fb27SDimitry Andric     if (!getRecordArgABI(RetTy, getCXXABI())) {
13506c3fb27SDimitry Andric       // Ignore empty structs/unions.
13606c3fb27SDimitry Andric       if (isEmptyRecord(getContext(), RetTy, true))
13706c3fb27SDimitry Andric         return ABIArgInfo::getIgnore();
13806c3fb27SDimitry Andric 
13906c3fb27SDimitry Andric       // Lower single-element structs to just return a regular value.
14006c3fb27SDimitry Andric       if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
14106c3fb27SDimitry Andric         return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
14206c3fb27SDimitry Andric 
14306c3fb27SDimitry Andric       if (const RecordType *RT = RetTy->getAs<RecordType>()) {
14406c3fb27SDimitry Andric         const RecordDecl *RD = RT->getDecl();
14506c3fb27SDimitry Andric         if (RD->hasFlexibleArrayMember())
14606c3fb27SDimitry Andric           return DefaultABIInfo::classifyReturnType(RetTy);
14706c3fb27SDimitry Andric       }
14806c3fb27SDimitry Andric 
14906c3fb27SDimitry Andric       // Pack aggregates <= 4 bytes into single VGPR or pair.
15006c3fb27SDimitry Andric       uint64_t Size = getContext().getTypeSize(RetTy);
15106c3fb27SDimitry Andric       if (Size <= 16)
15206c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
15306c3fb27SDimitry Andric 
15406c3fb27SDimitry Andric       if (Size <= 32)
15506c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
15606c3fb27SDimitry Andric 
15706c3fb27SDimitry Andric       if (Size <= 64) {
15806c3fb27SDimitry Andric         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
15906c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
16006c3fb27SDimitry Andric       }
16106c3fb27SDimitry Andric 
16206c3fb27SDimitry Andric       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
16306c3fb27SDimitry Andric         return ABIArgInfo::getDirect();
16406c3fb27SDimitry Andric     }
16506c3fb27SDimitry Andric   }
16606c3fb27SDimitry Andric 
16706c3fb27SDimitry Andric   // Otherwise just do the default thing.
16806c3fb27SDimitry Andric   return DefaultABIInfo::classifyReturnType(RetTy);
16906c3fb27SDimitry Andric }
17006c3fb27SDimitry Andric 
17106c3fb27SDimitry Andric /// For kernels all parameters are really passed in a special buffer. It doesn't
17206c3fb27SDimitry Andric /// make sense to pass anything byval, so everything must be direct.
17306c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
17406c3fb27SDimitry Andric   Ty = useFirstFieldIfTransparentUnion(Ty);
17506c3fb27SDimitry Andric 
17606c3fb27SDimitry Andric   // TODO: Can we omit empty structs?
17706c3fb27SDimitry Andric 
17806c3fb27SDimitry Andric   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
17906c3fb27SDimitry Andric     Ty = QualType(SeltTy, 0);
18006c3fb27SDimitry Andric 
18106c3fb27SDimitry Andric   llvm::Type *OrigLTy = CGT.ConvertType(Ty);
18206c3fb27SDimitry Andric   llvm::Type *LTy = OrigLTy;
18306c3fb27SDimitry Andric   if (getContext().getLangOpts().HIP) {
18406c3fb27SDimitry Andric     LTy = coerceKernelArgumentType(
18506c3fb27SDimitry Andric         OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
18606c3fb27SDimitry Andric         /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
18706c3fb27SDimitry Andric   }
18806c3fb27SDimitry Andric 
18906c3fb27SDimitry Andric   // FIXME: Should also use this for OpenCL, but it requires addressing the
19006c3fb27SDimitry Andric   // problem of kernels being called.
19106c3fb27SDimitry Andric   //
19206c3fb27SDimitry Andric   // FIXME: This doesn't apply the optimization of coercing pointers in structs
19306c3fb27SDimitry Andric   // to global address space when using byref. This would require implementing a
19406c3fb27SDimitry Andric   // new kind of coercion of the in-memory type when for indirect arguments.
19506c3fb27SDimitry Andric   if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
19606c3fb27SDimitry Andric       isAggregateTypeForABI(Ty)) {
19706c3fb27SDimitry Andric     return ABIArgInfo::getIndirectAliased(
19806c3fb27SDimitry Andric         getContext().getTypeAlignInChars(Ty),
19906c3fb27SDimitry Andric         getContext().getTargetAddressSpace(LangAS::opencl_constant),
20006c3fb27SDimitry Andric         false /*Realign*/, nullptr /*Padding*/);
20106c3fb27SDimitry Andric   }
20206c3fb27SDimitry Andric 
20306c3fb27SDimitry Andric   // If we set CanBeFlattened to true, CodeGen will expand the struct to its
20406c3fb27SDimitry Andric   // individual elements, which confuses the Clover OpenCL backend; therefore we
20506c3fb27SDimitry Andric   // have to set it to false here. Other args of getDirect() are just defaults.
20606c3fb27SDimitry Andric   return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
20706c3fb27SDimitry Andric }
20806c3fb27SDimitry Andric 
209*0fca6ea1SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
21006c3fb27SDimitry Andric                                                unsigned &NumRegsLeft) const {
21106c3fb27SDimitry Andric   assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
21206c3fb27SDimitry Andric 
21306c3fb27SDimitry Andric   Ty = useFirstFieldIfTransparentUnion(Ty);
21406c3fb27SDimitry Andric 
215*0fca6ea1SDimitry Andric   if (Variadic) {
216*0fca6ea1SDimitry Andric     return ABIArgInfo::getDirect(/*T=*/nullptr,
217*0fca6ea1SDimitry Andric                                  /*Offset=*/0,
218*0fca6ea1SDimitry Andric                                  /*Padding=*/nullptr,
219*0fca6ea1SDimitry Andric                                  /*CanBeFlattened=*/false,
220*0fca6ea1SDimitry Andric                                  /*Align=*/0);
221*0fca6ea1SDimitry Andric   }
222*0fca6ea1SDimitry Andric 
22306c3fb27SDimitry Andric   if (isAggregateTypeForABI(Ty)) {
22406c3fb27SDimitry Andric     // Records with non-trivial destructors/copy-constructors should not be
22506c3fb27SDimitry Andric     // passed by value.
22606c3fb27SDimitry Andric     if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
22706c3fb27SDimitry Andric       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
22806c3fb27SDimitry Andric 
22906c3fb27SDimitry Andric     // Ignore empty structs/unions.
23006c3fb27SDimitry Andric     if (isEmptyRecord(getContext(), Ty, true))
23106c3fb27SDimitry Andric       return ABIArgInfo::getIgnore();
23206c3fb27SDimitry Andric 
23306c3fb27SDimitry Andric     // Lower single-element structs to just pass a regular value. TODO: We
23406c3fb27SDimitry Andric     // could do reasonable-size multiple-element structs too, using getExpand(),
23506c3fb27SDimitry Andric     // though watch out for things like bitfields.
23606c3fb27SDimitry Andric     if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
23706c3fb27SDimitry Andric       return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
23806c3fb27SDimitry Andric 
23906c3fb27SDimitry Andric     if (const RecordType *RT = Ty->getAs<RecordType>()) {
24006c3fb27SDimitry Andric       const RecordDecl *RD = RT->getDecl();
24106c3fb27SDimitry Andric       if (RD->hasFlexibleArrayMember())
24206c3fb27SDimitry Andric         return DefaultABIInfo::classifyArgumentType(Ty);
24306c3fb27SDimitry Andric     }
24406c3fb27SDimitry Andric 
24506c3fb27SDimitry Andric     // Pack aggregates <= 8 bytes into single VGPR or pair.
24606c3fb27SDimitry Andric     uint64_t Size = getContext().getTypeSize(Ty);
24706c3fb27SDimitry Andric     if (Size <= 64) {
24806c3fb27SDimitry Andric       unsigned NumRegs = (Size + 31) / 32;
24906c3fb27SDimitry Andric       NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
25006c3fb27SDimitry Andric 
25106c3fb27SDimitry Andric       if (Size <= 16)
25206c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
25306c3fb27SDimitry Andric 
25406c3fb27SDimitry Andric       if (Size <= 32)
25506c3fb27SDimitry Andric         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
25606c3fb27SDimitry Andric 
25706c3fb27SDimitry Andric       // XXX: Should this be i64 instead, and should the limit increase?
25806c3fb27SDimitry Andric       llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
25906c3fb27SDimitry Andric       return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
26006c3fb27SDimitry Andric     }
26106c3fb27SDimitry Andric 
26206c3fb27SDimitry Andric     if (NumRegsLeft > 0) {
26306c3fb27SDimitry Andric       unsigned NumRegs = numRegsForType(Ty);
26406c3fb27SDimitry Andric       if (NumRegsLeft >= NumRegs) {
26506c3fb27SDimitry Andric         NumRegsLeft -= NumRegs;
26606c3fb27SDimitry Andric         return ABIArgInfo::getDirect();
26706c3fb27SDimitry Andric       }
26806c3fb27SDimitry Andric     }
2695f757f3fSDimitry Andric 
2705f757f3fSDimitry Andric     // Use pass-by-reference in stead of pass-by-value for struct arguments in
2715f757f3fSDimitry Andric     // function ABI.
2725f757f3fSDimitry Andric     return ABIArgInfo::getIndirectAliased(
2735f757f3fSDimitry Andric         getContext().getTypeAlignInChars(Ty),
2745f757f3fSDimitry Andric         getContext().getTargetAddressSpace(LangAS::opencl_private));
27506c3fb27SDimitry Andric   }
27606c3fb27SDimitry Andric 
27706c3fb27SDimitry Andric   // Otherwise just do the default thing.
27806c3fb27SDimitry Andric   ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
27906c3fb27SDimitry Andric   if (!ArgInfo.isIndirect()) {
28006c3fb27SDimitry Andric     unsigned NumRegs = numRegsForType(Ty);
28106c3fb27SDimitry Andric     NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
28206c3fb27SDimitry Andric   }
28306c3fb27SDimitry Andric 
28406c3fb27SDimitry Andric   return ArgInfo;
28506c3fb27SDimitry Andric }
28606c3fb27SDimitry Andric 
28706c3fb27SDimitry Andric class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
28806c3fb27SDimitry Andric public:
28906c3fb27SDimitry Andric   AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
29006c3fb27SDimitry Andric       : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
29106c3fb27SDimitry Andric 
29206c3fb27SDimitry Andric   void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
29306c3fb27SDimitry Andric                                  CodeGenModule &CGM) const;
29406c3fb27SDimitry Andric 
2955f757f3fSDimitry Andric   void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
2965f757f3fSDimitry Andric 
29706c3fb27SDimitry Andric   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
29806c3fb27SDimitry Andric                            CodeGen::CodeGenModule &M) const override;
29906c3fb27SDimitry Andric   unsigned getOpenCLKernelCallingConv() const override;
30006c3fb27SDimitry Andric 
30106c3fb27SDimitry Andric   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
30206c3fb27SDimitry Andric       llvm::PointerType *T, QualType QT) const override;
30306c3fb27SDimitry Andric 
30406c3fb27SDimitry Andric   LangAS getASTAllocaAddressSpace() const override {
30506c3fb27SDimitry Andric     return getLangASFromTargetAS(
30606c3fb27SDimitry Andric         getABIInfo().getDataLayout().getAllocaAddrSpace());
30706c3fb27SDimitry Andric   }
30806c3fb27SDimitry Andric   LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
30906c3fb27SDimitry Andric                                   const VarDecl *D) const override;
31006c3fb27SDimitry Andric   llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
31106c3fb27SDimitry Andric                                          SyncScope Scope,
31206c3fb27SDimitry Andric                                          llvm::AtomicOrdering Ordering,
31306c3fb27SDimitry Andric                                          llvm::LLVMContext &Ctx) const override;
31406c3fb27SDimitry Andric   llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
31506c3fb27SDimitry Andric                                          llvm::Function *BlockInvokeFunc,
31606c3fb27SDimitry Andric                                          llvm::Type *BlockTy) const override;
31706c3fb27SDimitry Andric   bool shouldEmitStaticExternCAliases() const override;
31806c3fb27SDimitry Andric   bool shouldEmitDWARFBitFieldSeparators() const override;
31906c3fb27SDimitry Andric   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
32006c3fb27SDimitry Andric };
32106c3fb27SDimitry Andric }
32206c3fb27SDimitry Andric 
32306c3fb27SDimitry Andric static bool requiresAMDGPUProtectedVisibility(const Decl *D,
32406c3fb27SDimitry Andric                                               llvm::GlobalValue *GV) {
32506c3fb27SDimitry Andric   if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
32606c3fb27SDimitry Andric     return false;
32706c3fb27SDimitry Andric 
3285f757f3fSDimitry Andric   return !D->hasAttr<OMPDeclareTargetDeclAttr>() &&
3295f757f3fSDimitry Andric          (D->hasAttr<OpenCLKernelAttr>() ||
33006c3fb27SDimitry Andric           (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
33106c3fb27SDimitry Andric           (isa<VarDecl>(D) &&
33206c3fb27SDimitry Andric            (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
33306c3fb27SDimitry Andric             cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
3345f757f3fSDimitry Andric             cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())));
33506c3fb27SDimitry Andric }
33606c3fb27SDimitry Andric 
33706c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
33806c3fb27SDimitry Andric     const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const {
33906c3fb27SDimitry Andric   const auto *ReqdWGS =
34006c3fb27SDimitry Andric       M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
34106c3fb27SDimitry Andric   const bool IsOpenCLKernel =
34206c3fb27SDimitry Andric       M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
34306c3fb27SDimitry Andric   const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
34406c3fb27SDimitry Andric 
34506c3fb27SDimitry Andric   const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
34606c3fb27SDimitry Andric   if (ReqdWGS || FlatWGS) {
3475f757f3fSDimitry Andric     M.handleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS);
34806c3fb27SDimitry Andric   } else if (IsOpenCLKernel || IsHIPKernel) {
34906c3fb27SDimitry Andric     // By default, restrict the maximum size to a value specified by
35006c3fb27SDimitry Andric     // --gpu-max-threads-per-block=n or its default value for HIP.
35106c3fb27SDimitry Andric     const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
35206c3fb27SDimitry Andric     const unsigned DefaultMaxWorkGroupSize =
35306c3fb27SDimitry Andric         IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
35406c3fb27SDimitry Andric                        : M.getLangOpts().GPUMaxThreadsPerBlock;
35506c3fb27SDimitry Andric     std::string AttrVal =
35606c3fb27SDimitry Andric         std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
35706c3fb27SDimitry Andric     F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
35806c3fb27SDimitry Andric   }
35906c3fb27SDimitry Andric 
3605f757f3fSDimitry Andric   if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>())
3615f757f3fSDimitry Andric     M.handleAMDGPUWavesPerEUAttr(F, Attr);
36206c3fb27SDimitry Andric 
36306c3fb27SDimitry Andric   if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
36406c3fb27SDimitry Andric     unsigned NumSGPR = Attr->getNumSGPR();
36506c3fb27SDimitry Andric 
36606c3fb27SDimitry Andric     if (NumSGPR != 0)
36706c3fb27SDimitry Andric       F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR));
36806c3fb27SDimitry Andric   }
36906c3fb27SDimitry Andric 
37006c3fb27SDimitry Andric   if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
37106c3fb27SDimitry Andric     uint32_t NumVGPR = Attr->getNumVGPR();
37206c3fb27SDimitry Andric 
37306c3fb27SDimitry Andric     if (NumVGPR != 0)
37406c3fb27SDimitry Andric       F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
37506c3fb27SDimitry Andric   }
376*0fca6ea1SDimitry Andric 
377*0fca6ea1SDimitry Andric   if (const auto *Attr = FD->getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
378*0fca6ea1SDimitry Andric     uint32_t X = Attr->getMaxNumWorkGroupsX()
379*0fca6ea1SDimitry Andric                      ->EvaluateKnownConstInt(M.getContext())
380*0fca6ea1SDimitry Andric                      .getExtValue();
381*0fca6ea1SDimitry Andric     // Y and Z dimensions default to 1 if not specified
382*0fca6ea1SDimitry Andric     uint32_t Y = Attr->getMaxNumWorkGroupsY()
383*0fca6ea1SDimitry Andric                      ? Attr->getMaxNumWorkGroupsY()
384*0fca6ea1SDimitry Andric                            ->EvaluateKnownConstInt(M.getContext())
385*0fca6ea1SDimitry Andric                            .getExtValue()
386*0fca6ea1SDimitry Andric                      : 1;
387*0fca6ea1SDimitry Andric     uint32_t Z = Attr->getMaxNumWorkGroupsZ()
388*0fca6ea1SDimitry Andric                      ? Attr->getMaxNumWorkGroupsZ()
389*0fca6ea1SDimitry Andric                            ->EvaluateKnownConstInt(M.getContext())
390*0fca6ea1SDimitry Andric                            .getExtValue()
391*0fca6ea1SDimitry Andric                      : 1;
392*0fca6ea1SDimitry Andric 
393*0fca6ea1SDimitry Andric     llvm::SmallString<32> AttrVal;
394*0fca6ea1SDimitry Andric     llvm::raw_svector_ostream OS(AttrVal);
395*0fca6ea1SDimitry Andric     OS << X << ',' << Y << ',' << Z;
396*0fca6ea1SDimitry Andric 
397*0fca6ea1SDimitry Andric     F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str());
398*0fca6ea1SDimitry Andric   }
39906c3fb27SDimitry Andric }
40006c3fb27SDimitry Andric 
4015f757f3fSDimitry Andric /// Emits control constants used to change per-architecture behaviour in the
4025f757f3fSDimitry Andric /// AMDGPU ROCm device libraries.
4035f757f3fSDimitry Andric void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
4045f757f3fSDimitry Andric     CodeGen::CodeGenModule &CGM) const {
4055f757f3fSDimitry Andric   StringRef Name = "__oclc_ABI_version";
4065f757f3fSDimitry Andric   llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
4075f757f3fSDimitry Andric   if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
4085f757f3fSDimitry Andric     return;
4095f757f3fSDimitry Andric 
4105f757f3fSDimitry Andric   if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
4115f757f3fSDimitry Andric       llvm::CodeObjectVersionKind::COV_None)
4125f757f3fSDimitry Andric     return;
4135f757f3fSDimitry Andric 
4145f757f3fSDimitry Andric   auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
4155f757f3fSDimitry Andric   llvm::Constant *COV = llvm::ConstantInt::get(
4165f757f3fSDimitry Andric       Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);
4175f757f3fSDimitry Andric 
4185f757f3fSDimitry Andric   // It needs to be constant weak_odr without externally_initialized so that
4195f757f3fSDimitry Andric   // the load instuction can be eliminated by the IPSCCP.
4205f757f3fSDimitry Andric   auto *GV = new llvm::GlobalVariable(
4215f757f3fSDimitry Andric       CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
4225f757f3fSDimitry Andric       nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
4235f757f3fSDimitry Andric       CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
4245f757f3fSDimitry Andric   GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
4255f757f3fSDimitry Andric   GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
4265f757f3fSDimitry Andric 
4275f757f3fSDimitry Andric   // Replace any external references to this variable with the new global.
4285f757f3fSDimitry Andric   if (OriginalGV) {
4295f757f3fSDimitry Andric     OriginalGV->replaceAllUsesWith(GV);
4305f757f3fSDimitry Andric     GV->takeName(OriginalGV);
4315f757f3fSDimitry Andric     OriginalGV->eraseFromParent();
4325f757f3fSDimitry Andric   }
4335f757f3fSDimitry Andric }
4345f757f3fSDimitry Andric 
43506c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setTargetAttributes(
43606c3fb27SDimitry Andric     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
43706c3fb27SDimitry Andric   if (requiresAMDGPUProtectedVisibility(D, GV)) {
43806c3fb27SDimitry Andric     GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
43906c3fb27SDimitry Andric     GV->setDSOLocal(true);
44006c3fb27SDimitry Andric   }
44106c3fb27SDimitry Andric 
44206c3fb27SDimitry Andric   if (GV->isDeclaration())
44306c3fb27SDimitry Andric     return;
44406c3fb27SDimitry Andric 
44506c3fb27SDimitry Andric   llvm::Function *F = dyn_cast<llvm::Function>(GV);
44606c3fb27SDimitry Andric   if (!F)
44706c3fb27SDimitry Andric     return;
44806c3fb27SDimitry Andric 
44906c3fb27SDimitry Andric   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
45006c3fb27SDimitry Andric   if (FD)
45106c3fb27SDimitry Andric     setFunctionDeclAttributes(FD, F, M);
45206c3fb27SDimitry Andric 
45306c3fb27SDimitry Andric   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
45406c3fb27SDimitry Andric     F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
45506c3fb27SDimitry Andric 
45606c3fb27SDimitry Andric   if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
45706c3fb27SDimitry Andric     F->addFnAttr("amdgpu-ieee", "false");
45806c3fb27SDimitry Andric }
45906c3fb27SDimitry Andric 
46006c3fb27SDimitry Andric unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
46106c3fb27SDimitry Andric   return llvm::CallingConv::AMDGPU_KERNEL;
46206c3fb27SDimitry Andric }
46306c3fb27SDimitry Andric 
46406c3fb27SDimitry Andric // Currently LLVM assumes null pointers always have value 0,
46506c3fb27SDimitry Andric // which results in incorrectly transformed IR. Therefore, instead of
46606c3fb27SDimitry Andric // emitting null pointers in private and local address spaces, a null
46706c3fb27SDimitry Andric // pointer in generic address space is emitted which is casted to a
46806c3fb27SDimitry Andric // pointer in local or private address space.
46906c3fb27SDimitry Andric llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
47006c3fb27SDimitry Andric     const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT,
47106c3fb27SDimitry Andric     QualType QT) const {
47206c3fb27SDimitry Andric   if (CGM.getContext().getTargetNullPointerValue(QT) == 0)
47306c3fb27SDimitry Andric     return llvm::ConstantPointerNull::get(PT);
47406c3fb27SDimitry Andric 
47506c3fb27SDimitry Andric   auto &Ctx = CGM.getContext();
47606c3fb27SDimitry Andric   auto NPT = llvm::PointerType::get(
47706c3fb27SDimitry Andric       PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
47806c3fb27SDimitry Andric   return llvm::ConstantExpr::getAddrSpaceCast(
47906c3fb27SDimitry Andric       llvm::ConstantPointerNull::get(NPT), PT);
48006c3fb27SDimitry Andric }
48106c3fb27SDimitry Andric 
48206c3fb27SDimitry Andric LangAS
48306c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
48406c3fb27SDimitry Andric                                                   const VarDecl *D) const {
48506c3fb27SDimitry Andric   assert(!CGM.getLangOpts().OpenCL &&
48606c3fb27SDimitry Andric          !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
48706c3fb27SDimitry Andric          "Address space agnostic languages only");
48806c3fb27SDimitry Andric   LangAS DefaultGlobalAS = getLangASFromTargetAS(
48906c3fb27SDimitry Andric       CGM.getContext().getTargetAddressSpace(LangAS::opencl_global));
49006c3fb27SDimitry Andric   if (!D)
49106c3fb27SDimitry Andric     return DefaultGlobalAS;
49206c3fb27SDimitry Andric 
49306c3fb27SDimitry Andric   LangAS AddrSpace = D->getType().getAddressSpace();
49406c3fb27SDimitry Andric   if (AddrSpace != LangAS::Default)
49506c3fb27SDimitry Andric     return AddrSpace;
49606c3fb27SDimitry Andric 
49706c3fb27SDimitry Andric   // Only promote to address space 4 if VarDecl has constant initialization.
4985f757f3fSDimitry Andric   if (D->getType().isConstantStorage(CGM.getContext(), false, false) &&
49906c3fb27SDimitry Andric       D->hasConstantInitialization()) {
50006c3fb27SDimitry Andric     if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
50106c3fb27SDimitry Andric       return *ConstAS;
50206c3fb27SDimitry Andric   }
50306c3fb27SDimitry Andric   return DefaultGlobalAS;
50406c3fb27SDimitry Andric }
50506c3fb27SDimitry Andric 
50606c3fb27SDimitry Andric llvm::SyncScope::ID
50706c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
50806c3fb27SDimitry Andric                                             SyncScope Scope,
50906c3fb27SDimitry Andric                                             llvm::AtomicOrdering Ordering,
51006c3fb27SDimitry Andric                                             llvm::LLVMContext &Ctx) const {
51106c3fb27SDimitry Andric   std::string Name;
51206c3fb27SDimitry Andric   switch (Scope) {
51306c3fb27SDimitry Andric   case SyncScope::HIPSingleThread:
5145f757f3fSDimitry Andric   case SyncScope::SingleScope:
51506c3fb27SDimitry Andric     Name = "singlethread";
51606c3fb27SDimitry Andric     break;
51706c3fb27SDimitry Andric   case SyncScope::HIPWavefront:
51806c3fb27SDimitry Andric   case SyncScope::OpenCLSubGroup:
5195f757f3fSDimitry Andric   case SyncScope::WavefrontScope:
52006c3fb27SDimitry Andric     Name = "wavefront";
52106c3fb27SDimitry Andric     break;
52206c3fb27SDimitry Andric   case SyncScope::HIPWorkgroup:
52306c3fb27SDimitry Andric   case SyncScope::OpenCLWorkGroup:
5245f757f3fSDimitry Andric   case SyncScope::WorkgroupScope:
52506c3fb27SDimitry Andric     Name = "workgroup";
52606c3fb27SDimitry Andric     break;
52706c3fb27SDimitry Andric   case SyncScope::HIPAgent:
52806c3fb27SDimitry Andric   case SyncScope::OpenCLDevice:
5295f757f3fSDimitry Andric   case SyncScope::DeviceScope:
53006c3fb27SDimitry Andric     Name = "agent";
53106c3fb27SDimitry Andric     break;
5325f757f3fSDimitry Andric   case SyncScope::SystemScope:
53306c3fb27SDimitry Andric   case SyncScope::HIPSystem:
53406c3fb27SDimitry Andric   case SyncScope::OpenCLAllSVMDevices:
53506c3fb27SDimitry Andric     Name = "";
53606c3fb27SDimitry Andric     break;
53706c3fb27SDimitry Andric   }
53806c3fb27SDimitry Andric 
53906c3fb27SDimitry Andric   if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
54006c3fb27SDimitry Andric     if (!Name.empty())
54106c3fb27SDimitry Andric       Name = Twine(Twine(Name) + Twine("-")).str();
54206c3fb27SDimitry Andric 
54306c3fb27SDimitry Andric     Name = Twine(Twine(Name) + Twine("one-as")).str();
54406c3fb27SDimitry Andric   }
54506c3fb27SDimitry Andric 
54606c3fb27SDimitry Andric   return Ctx.getOrInsertSyncScopeID(Name);
54706c3fb27SDimitry Andric }
54806c3fb27SDimitry Andric 
54906c3fb27SDimitry Andric bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
55006c3fb27SDimitry Andric   return false;
55106c3fb27SDimitry Andric }
55206c3fb27SDimitry Andric 
55306c3fb27SDimitry Andric bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const {
55406c3fb27SDimitry Andric   return true;
55506c3fb27SDimitry Andric }
55606c3fb27SDimitry Andric 
55706c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
55806c3fb27SDimitry Andric     const FunctionType *&FT) const {
55906c3fb27SDimitry Andric   FT = getABIInfo().getContext().adjustFunctionType(
56006c3fb27SDimitry Andric       FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
56106c3fb27SDimitry Andric }
56206c3fb27SDimitry Andric 
56306c3fb27SDimitry Andric /// Create an OpenCL kernel for an enqueued block.
56406c3fb27SDimitry Andric ///
56506c3fb27SDimitry Andric /// The type of the first argument (the block literal) is the struct type
56606c3fb27SDimitry Andric /// of the block literal instead of a pointer type. The first argument
56706c3fb27SDimitry Andric /// (block literal) is passed directly by value to the kernel. The kernel
56806c3fb27SDimitry Andric /// allocates the same type of struct on stack and stores the block literal
56906c3fb27SDimitry Andric /// to it and passes its pointer to the block invoke function. The kernel
57006c3fb27SDimitry Andric /// has "enqueued-block" function attribute and kernel argument metadata.
57106c3fb27SDimitry Andric llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
57206c3fb27SDimitry Andric     CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const {
57306c3fb27SDimitry Andric   auto &Builder = CGF.Builder;
57406c3fb27SDimitry Andric   auto &C = CGF.getLLVMContext();
57506c3fb27SDimitry Andric 
57606c3fb27SDimitry Andric   auto *InvokeFT = Invoke->getFunctionType();
57706c3fb27SDimitry Andric   llvm::SmallVector<llvm::Type *, 2> ArgTys;
57806c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
57906c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
58006c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
58106c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
58206c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
58306c3fb27SDimitry Andric   llvm::SmallVector<llvm::Metadata *, 8> ArgNames;
58406c3fb27SDimitry Andric 
58506c3fb27SDimitry Andric   ArgTys.push_back(BlockTy);
58606c3fb27SDimitry Andric   ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
58706c3fb27SDimitry Andric   AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
58806c3fb27SDimitry Andric   ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
58906c3fb27SDimitry Andric   ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
59006c3fb27SDimitry Andric   AccessQuals.push_back(llvm::MDString::get(C, "none"));
59106c3fb27SDimitry Andric   ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
59206c3fb27SDimitry Andric   for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
59306c3fb27SDimitry Andric     ArgTys.push_back(InvokeFT->getParamType(I));
59406c3fb27SDimitry Andric     ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
59506c3fb27SDimitry Andric     AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
59606c3fb27SDimitry Andric     AccessQuals.push_back(llvm::MDString::get(C, "none"));
59706c3fb27SDimitry Andric     ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
59806c3fb27SDimitry Andric     ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
59906c3fb27SDimitry Andric     ArgNames.push_back(
60006c3fb27SDimitry Andric         llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str()));
60106c3fb27SDimitry Andric   }
60206c3fb27SDimitry Andric   std::string Name = Invoke->getName().str() + "_kernel";
60306c3fb27SDimitry Andric   auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
60406c3fb27SDimitry Andric   auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
60506c3fb27SDimitry Andric                                    &CGF.CGM.getModule());
60606c3fb27SDimitry Andric   F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
60706c3fb27SDimitry Andric 
60806c3fb27SDimitry Andric   llvm::AttrBuilder KernelAttrs(C);
60906c3fb27SDimitry Andric   // FIXME: The invoke isn't applying the right attributes either
61006c3fb27SDimitry Andric   // FIXME: This is missing setTargetAttributes
61106c3fb27SDimitry Andric   CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs);
61206c3fb27SDimitry Andric   KernelAttrs.addAttribute("enqueued-block");
61306c3fb27SDimitry Andric   F->addFnAttrs(KernelAttrs);
61406c3fb27SDimitry Andric 
61506c3fb27SDimitry Andric   auto IP = CGF.Builder.saveIP();
61606c3fb27SDimitry Andric   auto *BB = llvm::BasicBlock::Create(C, "entry", F);
61706c3fb27SDimitry Andric   Builder.SetInsertPoint(BB);
61806c3fb27SDimitry Andric   const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy);
61906c3fb27SDimitry Andric   auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
62006c3fb27SDimitry Andric   BlockPtr->setAlignment(BlockAlign);
62106c3fb27SDimitry Andric   Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
62206c3fb27SDimitry Andric   auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
62306c3fb27SDimitry Andric   llvm::SmallVector<llvm::Value *, 2> Args;
62406c3fb27SDimitry Andric   Args.push_back(Cast);
62506c3fb27SDimitry Andric   for (llvm::Argument &A : llvm::drop_begin(F->args()))
62606c3fb27SDimitry Andric     Args.push_back(&A);
62706c3fb27SDimitry Andric   llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
62806c3fb27SDimitry Andric   call->setCallingConv(Invoke->getCallingConv());
62906c3fb27SDimitry Andric   Builder.CreateRetVoid();
63006c3fb27SDimitry Andric   Builder.restoreIP(IP);
63106c3fb27SDimitry Andric 
63206c3fb27SDimitry Andric   F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
63306c3fb27SDimitry Andric   F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
63406c3fb27SDimitry Andric   F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
63506c3fb27SDimitry Andric   F->setMetadata("kernel_arg_base_type",
63606c3fb27SDimitry Andric                  llvm::MDNode::get(C, ArgBaseTypeNames));
63706c3fb27SDimitry Andric   F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
63806c3fb27SDimitry Andric   if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
63906c3fb27SDimitry Andric     F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));
64006c3fb27SDimitry Andric 
64106c3fb27SDimitry Andric   return F;
64206c3fb27SDimitry Andric }
64306c3fb27SDimitry Andric 
6445f757f3fSDimitry Andric void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr(
6455f757f3fSDimitry Andric     llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS,
6465f757f3fSDimitry Andric     const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal,
6475f757f3fSDimitry Andric     int32_t *MaxThreadsVal) {
6485f757f3fSDimitry Andric   unsigned Min = 0;
6495f757f3fSDimitry Andric   unsigned Max = 0;
6505f757f3fSDimitry Andric   if (FlatWGS) {
6515f757f3fSDimitry Andric     Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
6525f757f3fSDimitry Andric     Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue();
6535f757f3fSDimitry Andric   }
6545f757f3fSDimitry Andric   if (ReqdWGS && Min == 0 && Max == 0)
6555f757f3fSDimitry Andric     Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
6565f757f3fSDimitry Andric 
6575f757f3fSDimitry Andric   if (Min != 0) {
6585f757f3fSDimitry Andric     assert(Min <= Max && "Min must be less than or equal Max");
6595f757f3fSDimitry Andric 
6605f757f3fSDimitry Andric     if (MinThreadsVal)
6615f757f3fSDimitry Andric       *MinThreadsVal = Min;
6625f757f3fSDimitry Andric     if (MaxThreadsVal)
6635f757f3fSDimitry Andric       *MaxThreadsVal = Max;
6645f757f3fSDimitry Andric     std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
6655f757f3fSDimitry Andric     if (F)
6665f757f3fSDimitry Andric       F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
6675f757f3fSDimitry Andric   } else
6685f757f3fSDimitry Andric     assert(Max == 0 && "Max must be zero");
6695f757f3fSDimitry Andric }
6705f757f3fSDimitry Andric 
6715f757f3fSDimitry Andric void CodeGenModule::handleAMDGPUWavesPerEUAttr(
6725f757f3fSDimitry Andric     llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) {
6735f757f3fSDimitry Andric   unsigned Min =
6745f757f3fSDimitry Andric       Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
6755f757f3fSDimitry Andric   unsigned Max =
6765f757f3fSDimitry Andric       Attr->getMax()
6775f757f3fSDimitry Andric           ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue()
6785f757f3fSDimitry Andric           : 0;
6795f757f3fSDimitry Andric 
6805f757f3fSDimitry Andric   if (Min != 0) {
6815f757f3fSDimitry Andric     assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
6825f757f3fSDimitry Andric 
6835f757f3fSDimitry Andric     std::string AttrVal = llvm::utostr(Min);
6845f757f3fSDimitry Andric     if (Max != 0)
6855f757f3fSDimitry Andric       AttrVal = AttrVal + "," + llvm::utostr(Max);
6865f757f3fSDimitry Andric     F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
6875f757f3fSDimitry Andric   } else
6885f757f3fSDimitry Andric     assert(Max == 0 && "Max must be zero");
6895f757f3fSDimitry Andric }
6905f757f3fSDimitry Andric 
69106c3fb27SDimitry Andric std::unique_ptr<TargetCodeGenInfo>
69206c3fb27SDimitry Andric CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) {
69306c3fb27SDimitry Andric   return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes());
69406c3fb27SDimitry Andric }
695