106c3fb27SDimitry Andric //===- AMDGPU.cpp ---------------------------------------------------------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric 906c3fb27SDimitry Andric #include "ABIInfoImpl.h" 1006c3fb27SDimitry Andric #include "TargetInfo.h" 11*5f757f3fSDimitry Andric #include "clang/Basic/TargetOptions.h" 1206c3fb27SDimitry Andric 1306c3fb27SDimitry Andric using namespace clang; 1406c3fb27SDimitry Andric using namespace clang::CodeGen; 1506c3fb27SDimitry Andric 1606c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1706c3fb27SDimitry Andric // AMDGPU ABI Implementation 1806c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1906c3fb27SDimitry Andric 2006c3fb27SDimitry Andric namespace { 2106c3fb27SDimitry Andric 2206c3fb27SDimitry Andric class AMDGPUABIInfo final : public DefaultABIInfo { 2306c3fb27SDimitry Andric private: 2406c3fb27SDimitry Andric static const unsigned MaxNumRegsForArgsRet = 16; 2506c3fb27SDimitry Andric 2606c3fb27SDimitry Andric unsigned numRegsForType(QualType Ty) const; 2706c3fb27SDimitry Andric 2806c3fb27SDimitry Andric bool isHomogeneousAggregateBaseType(QualType Ty) const override; 2906c3fb27SDimitry Andric bool isHomogeneousAggregateSmallEnough(const Type *Base, 3006c3fb27SDimitry Andric uint64_t Members) const override; 3106c3fb27SDimitry Andric 3206c3fb27SDimitry Andric // Coerce HIP scalar pointer arguments from generic pointers to global ones. 3306c3fb27SDimitry Andric llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, 3406c3fb27SDimitry Andric unsigned ToAS) const { 3506c3fb27SDimitry Andric // Single value types. 3606c3fb27SDimitry Andric auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty); 3706c3fb27SDimitry Andric if (PtrTy && PtrTy->getAddressSpace() == FromAS) 3806c3fb27SDimitry Andric return llvm::PointerType::get(Ty->getContext(), ToAS); 3906c3fb27SDimitry Andric return Ty; 4006c3fb27SDimitry Andric } 4106c3fb27SDimitry Andric 4206c3fb27SDimitry Andric public: 4306c3fb27SDimitry Andric explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : 4406c3fb27SDimitry Andric DefaultABIInfo(CGT) {} 4506c3fb27SDimitry Andric 4606c3fb27SDimitry Andric ABIArgInfo classifyReturnType(QualType RetTy) const; 4706c3fb27SDimitry Andric ABIArgInfo classifyKernelArgumentType(QualType Ty) const; 4806c3fb27SDimitry Andric ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; 4906c3fb27SDimitry Andric 5006c3fb27SDimitry Andric void computeInfo(CGFunctionInfo &FI) const override; 5106c3fb27SDimitry Andric Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 5206c3fb27SDimitry Andric QualType Ty) const override; 5306c3fb27SDimitry Andric }; 5406c3fb27SDimitry Andric 5506c3fb27SDimitry Andric bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { 5606c3fb27SDimitry Andric return true; 5706c3fb27SDimitry Andric } 5806c3fb27SDimitry Andric 5906c3fb27SDimitry Andric bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( 6006c3fb27SDimitry Andric const Type *Base, uint64_t Members) const { 6106c3fb27SDimitry Andric uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; 6206c3fb27SDimitry Andric 6306c3fb27SDimitry Andric // Homogeneous Aggregates may occupy at most 16 registers. 6406c3fb27SDimitry Andric return Members * NumRegs <= MaxNumRegsForArgsRet; 6506c3fb27SDimitry Andric } 6606c3fb27SDimitry Andric 6706c3fb27SDimitry Andric /// Estimate number of registers the type will use when passed in registers. 6806c3fb27SDimitry Andric unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { 6906c3fb27SDimitry Andric unsigned NumRegs = 0; 7006c3fb27SDimitry Andric 7106c3fb27SDimitry Andric if (const VectorType *VT = Ty->getAs<VectorType>()) { 7206c3fb27SDimitry Andric // Compute from the number of elements. The reported size is based on the 7306c3fb27SDimitry Andric // in-memory size, which includes the padding 4th element for 3-vectors. 7406c3fb27SDimitry Andric QualType EltTy = VT->getElementType(); 7506c3fb27SDimitry Andric unsigned EltSize = getContext().getTypeSize(EltTy); 7606c3fb27SDimitry Andric 7706c3fb27SDimitry Andric // 16-bit element vectors should be passed as packed. 7806c3fb27SDimitry Andric if (EltSize == 16) 7906c3fb27SDimitry Andric return (VT->getNumElements() + 1) / 2; 8006c3fb27SDimitry Andric 8106c3fb27SDimitry Andric unsigned EltNumRegs = (EltSize + 31) / 32; 8206c3fb27SDimitry Andric return EltNumRegs * VT->getNumElements(); 8306c3fb27SDimitry Andric } 8406c3fb27SDimitry Andric 8506c3fb27SDimitry Andric if (const RecordType *RT = Ty->getAs<RecordType>()) { 8606c3fb27SDimitry Andric const RecordDecl *RD = RT->getDecl(); 8706c3fb27SDimitry Andric assert(!RD->hasFlexibleArrayMember()); 8806c3fb27SDimitry Andric 8906c3fb27SDimitry Andric for (const FieldDecl *Field : RD->fields()) { 9006c3fb27SDimitry Andric QualType FieldTy = Field->getType(); 9106c3fb27SDimitry Andric NumRegs += numRegsForType(FieldTy); 9206c3fb27SDimitry Andric } 9306c3fb27SDimitry Andric 9406c3fb27SDimitry Andric return NumRegs; 9506c3fb27SDimitry Andric } 9606c3fb27SDimitry Andric 9706c3fb27SDimitry Andric return (getContext().getTypeSize(Ty) + 31) / 32; 9806c3fb27SDimitry Andric } 9906c3fb27SDimitry Andric 10006c3fb27SDimitry Andric void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { 10106c3fb27SDimitry Andric llvm::CallingConv::ID CC = FI.getCallingConvention(); 10206c3fb27SDimitry Andric 10306c3fb27SDimitry Andric if (!getCXXABI().classifyReturnType(FI)) 10406c3fb27SDimitry Andric FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 10506c3fb27SDimitry Andric 10606c3fb27SDimitry Andric unsigned NumRegsLeft = MaxNumRegsForArgsRet; 10706c3fb27SDimitry Andric for (auto &Arg : FI.arguments()) { 10806c3fb27SDimitry Andric if (CC == llvm::CallingConv::AMDGPU_KERNEL) { 10906c3fb27SDimitry Andric Arg.info = classifyKernelArgumentType(Arg.type); 11006c3fb27SDimitry Andric } else { 11106c3fb27SDimitry Andric Arg.info = classifyArgumentType(Arg.type, NumRegsLeft); 11206c3fb27SDimitry Andric } 11306c3fb27SDimitry Andric } 11406c3fb27SDimitry Andric } 11506c3fb27SDimitry Andric 11606c3fb27SDimitry Andric Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 11706c3fb27SDimitry Andric QualType Ty) const { 11806c3fb27SDimitry Andric llvm_unreachable("AMDGPU does not support varargs"); 11906c3fb27SDimitry Andric } 12006c3fb27SDimitry Andric 12106c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { 12206c3fb27SDimitry Andric if (isAggregateTypeForABI(RetTy)) { 12306c3fb27SDimitry Andric // Records with non-trivial destructors/copy-constructors should not be 12406c3fb27SDimitry Andric // returned by value. 12506c3fb27SDimitry Andric if (!getRecordArgABI(RetTy, getCXXABI())) { 12606c3fb27SDimitry Andric // Ignore empty structs/unions. 12706c3fb27SDimitry Andric if (isEmptyRecord(getContext(), RetTy, true)) 12806c3fb27SDimitry Andric return ABIArgInfo::getIgnore(); 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric // Lower single-element structs to just return a regular value. 13106c3fb27SDimitry Andric if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) 13206c3fb27SDimitry Andric return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 13306c3fb27SDimitry Andric 13406c3fb27SDimitry Andric if (const RecordType *RT = RetTy->getAs<RecordType>()) { 13506c3fb27SDimitry Andric const RecordDecl *RD = RT->getDecl(); 13606c3fb27SDimitry Andric if (RD->hasFlexibleArrayMember()) 13706c3fb27SDimitry Andric return DefaultABIInfo::classifyReturnType(RetTy); 13806c3fb27SDimitry Andric } 13906c3fb27SDimitry Andric 14006c3fb27SDimitry Andric // Pack aggregates <= 4 bytes into single VGPR or pair. 14106c3fb27SDimitry Andric uint64_t Size = getContext().getTypeSize(RetTy); 14206c3fb27SDimitry Andric if (Size <= 16) 14306c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); 14406c3fb27SDimitry Andric 14506c3fb27SDimitry Andric if (Size <= 32) 14606c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); 14706c3fb27SDimitry Andric 14806c3fb27SDimitry Andric if (Size <= 64) { 14906c3fb27SDimitry Andric llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); 15006c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); 15106c3fb27SDimitry Andric } 15206c3fb27SDimitry Andric 15306c3fb27SDimitry Andric if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) 15406c3fb27SDimitry Andric return ABIArgInfo::getDirect(); 15506c3fb27SDimitry Andric } 15606c3fb27SDimitry Andric } 15706c3fb27SDimitry Andric 15806c3fb27SDimitry Andric // Otherwise just do the default thing. 15906c3fb27SDimitry Andric return DefaultABIInfo::classifyReturnType(RetTy); 16006c3fb27SDimitry Andric } 16106c3fb27SDimitry Andric 16206c3fb27SDimitry Andric /// For kernels all parameters are really passed in a special buffer. It doesn't 16306c3fb27SDimitry Andric /// make sense to pass anything byval, so everything must be direct. 16406c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { 16506c3fb27SDimitry Andric Ty = useFirstFieldIfTransparentUnion(Ty); 16606c3fb27SDimitry Andric 16706c3fb27SDimitry Andric // TODO: Can we omit empty structs? 16806c3fb27SDimitry Andric 16906c3fb27SDimitry Andric if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) 17006c3fb27SDimitry Andric Ty = QualType(SeltTy, 0); 17106c3fb27SDimitry Andric 17206c3fb27SDimitry Andric llvm::Type *OrigLTy = CGT.ConvertType(Ty); 17306c3fb27SDimitry Andric llvm::Type *LTy = OrigLTy; 17406c3fb27SDimitry Andric if (getContext().getLangOpts().HIP) { 17506c3fb27SDimitry Andric LTy = coerceKernelArgumentType( 17606c3fb27SDimitry Andric OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), 17706c3fb27SDimitry Andric /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); 17806c3fb27SDimitry Andric } 17906c3fb27SDimitry Andric 18006c3fb27SDimitry Andric // FIXME: Should also use this for OpenCL, but it requires addressing the 18106c3fb27SDimitry Andric // problem of kernels being called. 18206c3fb27SDimitry Andric // 18306c3fb27SDimitry Andric // FIXME: This doesn't apply the optimization of coercing pointers in structs 18406c3fb27SDimitry Andric // to global address space when using byref. This would require implementing a 18506c3fb27SDimitry Andric // new kind of coercion of the in-memory type when for indirect arguments. 18606c3fb27SDimitry Andric if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy && 18706c3fb27SDimitry Andric isAggregateTypeForABI(Ty)) { 18806c3fb27SDimitry Andric return ABIArgInfo::getIndirectAliased( 18906c3fb27SDimitry Andric getContext().getTypeAlignInChars(Ty), 19006c3fb27SDimitry Andric getContext().getTargetAddressSpace(LangAS::opencl_constant), 19106c3fb27SDimitry Andric false /*Realign*/, nullptr /*Padding*/); 19206c3fb27SDimitry Andric } 19306c3fb27SDimitry Andric 19406c3fb27SDimitry Andric // If we set CanBeFlattened to true, CodeGen will expand the struct to its 19506c3fb27SDimitry Andric // individual elements, which confuses the Clover OpenCL backend; therefore we 19606c3fb27SDimitry Andric // have to set it to false here. Other args of getDirect() are just defaults. 19706c3fb27SDimitry Andric return ABIArgInfo::getDirect(LTy, 0, nullptr, false); 19806c3fb27SDimitry Andric } 19906c3fb27SDimitry Andric 20006c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, 20106c3fb27SDimitry Andric unsigned &NumRegsLeft) const { 20206c3fb27SDimitry Andric assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); 20306c3fb27SDimitry Andric 20406c3fb27SDimitry Andric Ty = useFirstFieldIfTransparentUnion(Ty); 20506c3fb27SDimitry Andric 20606c3fb27SDimitry Andric if (isAggregateTypeForABI(Ty)) { 20706c3fb27SDimitry Andric // Records with non-trivial destructors/copy-constructors should not be 20806c3fb27SDimitry Andric // passed by value. 20906c3fb27SDimitry Andric if (auto RAA = getRecordArgABI(Ty, getCXXABI())) 21006c3fb27SDimitry Andric return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 21106c3fb27SDimitry Andric 21206c3fb27SDimitry Andric // Ignore empty structs/unions. 21306c3fb27SDimitry Andric if (isEmptyRecord(getContext(), Ty, true)) 21406c3fb27SDimitry Andric return ABIArgInfo::getIgnore(); 21506c3fb27SDimitry Andric 21606c3fb27SDimitry Andric // Lower single-element structs to just pass a regular value. TODO: We 21706c3fb27SDimitry Andric // could do reasonable-size multiple-element structs too, using getExpand(), 21806c3fb27SDimitry Andric // though watch out for things like bitfields. 21906c3fb27SDimitry Andric if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) 22006c3fb27SDimitry Andric return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 22106c3fb27SDimitry Andric 22206c3fb27SDimitry Andric if (const RecordType *RT = Ty->getAs<RecordType>()) { 22306c3fb27SDimitry Andric const RecordDecl *RD = RT->getDecl(); 22406c3fb27SDimitry Andric if (RD->hasFlexibleArrayMember()) 22506c3fb27SDimitry Andric return DefaultABIInfo::classifyArgumentType(Ty); 22606c3fb27SDimitry Andric } 22706c3fb27SDimitry Andric 22806c3fb27SDimitry Andric // Pack aggregates <= 8 bytes into single VGPR or pair. 22906c3fb27SDimitry Andric uint64_t Size = getContext().getTypeSize(Ty); 23006c3fb27SDimitry Andric if (Size <= 64) { 23106c3fb27SDimitry Andric unsigned NumRegs = (Size + 31) / 32; 23206c3fb27SDimitry Andric NumRegsLeft -= std::min(NumRegsLeft, NumRegs); 23306c3fb27SDimitry Andric 23406c3fb27SDimitry Andric if (Size <= 16) 23506c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); 23606c3fb27SDimitry Andric 23706c3fb27SDimitry Andric if (Size <= 32) 23806c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); 23906c3fb27SDimitry Andric 24006c3fb27SDimitry Andric // XXX: Should this be i64 instead, and should the limit increase? 24106c3fb27SDimitry Andric llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); 24206c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); 24306c3fb27SDimitry Andric } 24406c3fb27SDimitry Andric 24506c3fb27SDimitry Andric if (NumRegsLeft > 0) { 24606c3fb27SDimitry Andric unsigned NumRegs = numRegsForType(Ty); 24706c3fb27SDimitry Andric if (NumRegsLeft >= NumRegs) { 24806c3fb27SDimitry Andric NumRegsLeft -= NumRegs; 24906c3fb27SDimitry Andric return ABIArgInfo::getDirect(); 25006c3fb27SDimitry Andric } 25106c3fb27SDimitry Andric } 252*5f757f3fSDimitry Andric 253*5f757f3fSDimitry Andric // Use pass-by-reference in stead of pass-by-value for struct arguments in 254*5f757f3fSDimitry Andric // function ABI. 255*5f757f3fSDimitry Andric return ABIArgInfo::getIndirectAliased( 256*5f757f3fSDimitry Andric getContext().getTypeAlignInChars(Ty), 257*5f757f3fSDimitry Andric getContext().getTargetAddressSpace(LangAS::opencl_private)); 25806c3fb27SDimitry Andric } 25906c3fb27SDimitry Andric 26006c3fb27SDimitry Andric // Otherwise just do the default thing. 26106c3fb27SDimitry Andric ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); 26206c3fb27SDimitry Andric if (!ArgInfo.isIndirect()) { 26306c3fb27SDimitry Andric unsigned NumRegs = numRegsForType(Ty); 26406c3fb27SDimitry Andric NumRegsLeft -= std::min(NumRegs, NumRegsLeft); 26506c3fb27SDimitry Andric } 26606c3fb27SDimitry Andric 26706c3fb27SDimitry Andric return ArgInfo; 26806c3fb27SDimitry Andric } 26906c3fb27SDimitry Andric 27006c3fb27SDimitry Andric class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { 27106c3fb27SDimitry Andric public: 27206c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) 27306c3fb27SDimitry Andric : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {} 27406c3fb27SDimitry Andric 27506c3fb27SDimitry Andric void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, 27606c3fb27SDimitry Andric CodeGenModule &CGM) const; 27706c3fb27SDimitry Andric 278*5f757f3fSDimitry Andric void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override; 279*5f757f3fSDimitry Andric 28006c3fb27SDimitry Andric void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 28106c3fb27SDimitry Andric CodeGen::CodeGenModule &M) const override; 28206c3fb27SDimitry Andric unsigned getOpenCLKernelCallingConv() const override; 28306c3fb27SDimitry Andric 28406c3fb27SDimitry Andric llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, 28506c3fb27SDimitry Andric llvm::PointerType *T, QualType QT) const override; 28606c3fb27SDimitry Andric 28706c3fb27SDimitry Andric LangAS getASTAllocaAddressSpace() const override { 28806c3fb27SDimitry Andric return getLangASFromTargetAS( 28906c3fb27SDimitry Andric getABIInfo().getDataLayout().getAllocaAddrSpace()); 29006c3fb27SDimitry Andric } 29106c3fb27SDimitry Andric LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, 29206c3fb27SDimitry Andric const VarDecl *D) const override; 29306c3fb27SDimitry Andric llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, 29406c3fb27SDimitry Andric SyncScope Scope, 29506c3fb27SDimitry Andric llvm::AtomicOrdering Ordering, 29606c3fb27SDimitry Andric llvm::LLVMContext &Ctx) const override; 29706c3fb27SDimitry Andric llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF, 29806c3fb27SDimitry Andric llvm::Function *BlockInvokeFunc, 29906c3fb27SDimitry Andric llvm::Type *BlockTy) const override; 30006c3fb27SDimitry Andric bool shouldEmitStaticExternCAliases() const override; 30106c3fb27SDimitry Andric bool shouldEmitDWARFBitFieldSeparators() const override; 30206c3fb27SDimitry Andric void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; 30306c3fb27SDimitry Andric }; 30406c3fb27SDimitry Andric } 30506c3fb27SDimitry Andric 30606c3fb27SDimitry Andric static bool requiresAMDGPUProtectedVisibility(const Decl *D, 30706c3fb27SDimitry Andric llvm::GlobalValue *GV) { 30806c3fb27SDimitry Andric if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) 30906c3fb27SDimitry Andric return false; 31006c3fb27SDimitry Andric 311*5f757f3fSDimitry Andric return !D->hasAttr<OMPDeclareTargetDeclAttr>() && 312*5f757f3fSDimitry Andric (D->hasAttr<OpenCLKernelAttr>() || 31306c3fb27SDimitry Andric (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || 31406c3fb27SDimitry Andric (isa<VarDecl>(D) && 31506c3fb27SDimitry Andric (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || 31606c3fb27SDimitry Andric cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() || 317*5f757f3fSDimitry Andric cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()))); 31806c3fb27SDimitry Andric } 31906c3fb27SDimitry Andric 32006c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( 32106c3fb27SDimitry Andric const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const { 32206c3fb27SDimitry Andric const auto *ReqdWGS = 32306c3fb27SDimitry Andric M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; 32406c3fb27SDimitry Andric const bool IsOpenCLKernel = 32506c3fb27SDimitry Andric M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); 32606c3fb27SDimitry Andric const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>(); 32706c3fb27SDimitry Andric 32806c3fb27SDimitry Andric const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); 32906c3fb27SDimitry Andric if (ReqdWGS || FlatWGS) { 330*5f757f3fSDimitry Andric M.handleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS); 33106c3fb27SDimitry Andric } else if (IsOpenCLKernel || IsHIPKernel) { 33206c3fb27SDimitry Andric // By default, restrict the maximum size to a value specified by 33306c3fb27SDimitry Andric // --gpu-max-threads-per-block=n or its default value for HIP. 33406c3fb27SDimitry Andric const unsigned OpenCLDefaultMaxWorkGroupSize = 256; 33506c3fb27SDimitry Andric const unsigned DefaultMaxWorkGroupSize = 33606c3fb27SDimitry Andric IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize 33706c3fb27SDimitry Andric : M.getLangOpts().GPUMaxThreadsPerBlock; 33806c3fb27SDimitry Andric std::string AttrVal = 33906c3fb27SDimitry Andric std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); 34006c3fb27SDimitry Andric F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); 34106c3fb27SDimitry Andric } 34206c3fb27SDimitry Andric 343*5f757f3fSDimitry Andric if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) 344*5f757f3fSDimitry Andric M.handleAMDGPUWavesPerEUAttr(F, Attr); 34506c3fb27SDimitry Andric 34606c3fb27SDimitry Andric if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { 34706c3fb27SDimitry Andric unsigned NumSGPR = Attr->getNumSGPR(); 34806c3fb27SDimitry Andric 34906c3fb27SDimitry Andric if (NumSGPR != 0) 35006c3fb27SDimitry Andric F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); 35106c3fb27SDimitry Andric } 35206c3fb27SDimitry Andric 35306c3fb27SDimitry Andric if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { 35406c3fb27SDimitry Andric uint32_t NumVGPR = Attr->getNumVGPR(); 35506c3fb27SDimitry Andric 35606c3fb27SDimitry Andric if (NumVGPR != 0) 35706c3fb27SDimitry Andric F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); 35806c3fb27SDimitry Andric } 35906c3fb27SDimitry Andric } 36006c3fb27SDimitry Andric 361*5f757f3fSDimitry Andric /// Emits control constants used to change per-architecture behaviour in the 362*5f757f3fSDimitry Andric /// AMDGPU ROCm device libraries. 363*5f757f3fSDimitry Andric void AMDGPUTargetCodeGenInfo::emitTargetGlobals( 364*5f757f3fSDimitry Andric CodeGen::CodeGenModule &CGM) const { 365*5f757f3fSDimitry Andric StringRef Name = "__oclc_ABI_version"; 366*5f757f3fSDimitry Andric llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name); 367*5f757f3fSDimitry Andric if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage())) 368*5f757f3fSDimitry Andric return; 369*5f757f3fSDimitry Andric 370*5f757f3fSDimitry Andric if (CGM.getTarget().getTargetOpts().CodeObjectVersion == 371*5f757f3fSDimitry Andric llvm::CodeObjectVersionKind::COV_None) 372*5f757f3fSDimitry Andric return; 373*5f757f3fSDimitry Andric 374*5f757f3fSDimitry Andric auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32); 375*5f757f3fSDimitry Andric llvm::Constant *COV = llvm::ConstantInt::get( 376*5f757f3fSDimitry Andric Type, CGM.getTarget().getTargetOpts().CodeObjectVersion); 377*5f757f3fSDimitry Andric 378*5f757f3fSDimitry Andric // It needs to be constant weak_odr without externally_initialized so that 379*5f757f3fSDimitry Andric // the load instuction can be eliminated by the IPSCCP. 380*5f757f3fSDimitry Andric auto *GV = new llvm::GlobalVariable( 381*5f757f3fSDimitry Andric CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name, 382*5f757f3fSDimitry Andric nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 383*5f757f3fSDimitry Andric CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); 384*5f757f3fSDimitry Andric GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); 385*5f757f3fSDimitry Andric GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility); 386*5f757f3fSDimitry Andric 387*5f757f3fSDimitry Andric // Replace any external references to this variable with the new global. 388*5f757f3fSDimitry Andric if (OriginalGV) { 389*5f757f3fSDimitry Andric OriginalGV->replaceAllUsesWith(GV); 390*5f757f3fSDimitry Andric GV->takeName(OriginalGV); 391*5f757f3fSDimitry Andric OriginalGV->eraseFromParent(); 392*5f757f3fSDimitry Andric } 393*5f757f3fSDimitry Andric } 394*5f757f3fSDimitry Andric 39506c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setTargetAttributes( 39606c3fb27SDimitry Andric const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { 39706c3fb27SDimitry Andric if (requiresAMDGPUProtectedVisibility(D, GV)) { 39806c3fb27SDimitry Andric GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); 39906c3fb27SDimitry Andric GV->setDSOLocal(true); 40006c3fb27SDimitry Andric } 40106c3fb27SDimitry Andric 40206c3fb27SDimitry Andric if (GV->isDeclaration()) 40306c3fb27SDimitry Andric return; 40406c3fb27SDimitry Andric 40506c3fb27SDimitry Andric llvm::Function *F = dyn_cast<llvm::Function>(GV); 40606c3fb27SDimitry Andric if (!F) 40706c3fb27SDimitry Andric return; 40806c3fb27SDimitry Andric 40906c3fb27SDimitry Andric const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); 41006c3fb27SDimitry Andric if (FD) 41106c3fb27SDimitry Andric setFunctionDeclAttributes(FD, F, M); 41206c3fb27SDimitry Andric 41306c3fb27SDimitry Andric if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) 41406c3fb27SDimitry Andric F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); 41506c3fb27SDimitry Andric 41606c3fb27SDimitry Andric if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) 41706c3fb27SDimitry Andric F->addFnAttr("amdgpu-ieee", "false"); 41806c3fb27SDimitry Andric } 41906c3fb27SDimitry Andric 42006c3fb27SDimitry Andric unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { 42106c3fb27SDimitry Andric return llvm::CallingConv::AMDGPU_KERNEL; 42206c3fb27SDimitry Andric } 42306c3fb27SDimitry Andric 42406c3fb27SDimitry Andric // Currently LLVM assumes null pointers always have value 0, 42506c3fb27SDimitry Andric // which results in incorrectly transformed IR. Therefore, instead of 42606c3fb27SDimitry Andric // emitting null pointers in private and local address spaces, a null 42706c3fb27SDimitry Andric // pointer in generic address space is emitted which is casted to a 42806c3fb27SDimitry Andric // pointer in local or private address space. 42906c3fb27SDimitry Andric llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( 43006c3fb27SDimitry Andric const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT, 43106c3fb27SDimitry Andric QualType QT) const { 43206c3fb27SDimitry Andric if (CGM.getContext().getTargetNullPointerValue(QT) == 0) 43306c3fb27SDimitry Andric return llvm::ConstantPointerNull::get(PT); 43406c3fb27SDimitry Andric 43506c3fb27SDimitry Andric auto &Ctx = CGM.getContext(); 43606c3fb27SDimitry Andric auto NPT = llvm::PointerType::get( 43706c3fb27SDimitry Andric PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); 43806c3fb27SDimitry Andric return llvm::ConstantExpr::getAddrSpaceCast( 43906c3fb27SDimitry Andric llvm::ConstantPointerNull::get(NPT), PT); 44006c3fb27SDimitry Andric } 44106c3fb27SDimitry Andric 44206c3fb27SDimitry Andric LangAS 44306c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, 44406c3fb27SDimitry Andric const VarDecl *D) const { 44506c3fb27SDimitry Andric assert(!CGM.getLangOpts().OpenCL && 44606c3fb27SDimitry Andric !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && 44706c3fb27SDimitry Andric "Address space agnostic languages only"); 44806c3fb27SDimitry Andric LangAS DefaultGlobalAS = getLangASFromTargetAS( 44906c3fb27SDimitry Andric CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); 45006c3fb27SDimitry Andric if (!D) 45106c3fb27SDimitry Andric return DefaultGlobalAS; 45206c3fb27SDimitry Andric 45306c3fb27SDimitry Andric LangAS AddrSpace = D->getType().getAddressSpace(); 45406c3fb27SDimitry Andric if (AddrSpace != LangAS::Default) 45506c3fb27SDimitry Andric return AddrSpace; 45606c3fb27SDimitry Andric 45706c3fb27SDimitry Andric // Only promote to address space 4 if VarDecl has constant initialization. 458*5f757f3fSDimitry Andric if (D->getType().isConstantStorage(CGM.getContext(), false, false) && 45906c3fb27SDimitry Andric D->hasConstantInitialization()) { 46006c3fb27SDimitry Andric if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) 46106c3fb27SDimitry Andric return *ConstAS; 46206c3fb27SDimitry Andric } 46306c3fb27SDimitry Andric return DefaultGlobalAS; 46406c3fb27SDimitry Andric } 46506c3fb27SDimitry Andric 46606c3fb27SDimitry Andric llvm::SyncScope::ID 46706c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, 46806c3fb27SDimitry Andric SyncScope Scope, 46906c3fb27SDimitry Andric llvm::AtomicOrdering Ordering, 47006c3fb27SDimitry Andric llvm::LLVMContext &Ctx) const { 47106c3fb27SDimitry Andric std::string Name; 47206c3fb27SDimitry Andric switch (Scope) { 47306c3fb27SDimitry Andric case SyncScope::HIPSingleThread: 474*5f757f3fSDimitry Andric case SyncScope::SingleScope: 47506c3fb27SDimitry Andric Name = "singlethread"; 47606c3fb27SDimitry Andric break; 47706c3fb27SDimitry Andric case SyncScope::HIPWavefront: 47806c3fb27SDimitry Andric case SyncScope::OpenCLSubGroup: 479*5f757f3fSDimitry Andric case SyncScope::WavefrontScope: 48006c3fb27SDimitry Andric Name = "wavefront"; 48106c3fb27SDimitry Andric break; 48206c3fb27SDimitry Andric case SyncScope::HIPWorkgroup: 48306c3fb27SDimitry Andric case SyncScope::OpenCLWorkGroup: 484*5f757f3fSDimitry Andric case SyncScope::WorkgroupScope: 48506c3fb27SDimitry Andric Name = "workgroup"; 48606c3fb27SDimitry Andric break; 48706c3fb27SDimitry Andric case SyncScope::HIPAgent: 48806c3fb27SDimitry Andric case SyncScope::OpenCLDevice: 489*5f757f3fSDimitry Andric case SyncScope::DeviceScope: 49006c3fb27SDimitry Andric Name = "agent"; 49106c3fb27SDimitry Andric break; 492*5f757f3fSDimitry Andric case SyncScope::SystemScope: 49306c3fb27SDimitry Andric case SyncScope::HIPSystem: 49406c3fb27SDimitry Andric case SyncScope::OpenCLAllSVMDevices: 49506c3fb27SDimitry Andric Name = ""; 49606c3fb27SDimitry Andric break; 49706c3fb27SDimitry Andric } 49806c3fb27SDimitry Andric 49906c3fb27SDimitry Andric if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { 50006c3fb27SDimitry Andric if (!Name.empty()) 50106c3fb27SDimitry Andric Name = Twine(Twine(Name) + Twine("-")).str(); 50206c3fb27SDimitry Andric 50306c3fb27SDimitry Andric Name = Twine(Twine(Name) + Twine("one-as")).str(); 50406c3fb27SDimitry Andric } 50506c3fb27SDimitry Andric 50606c3fb27SDimitry Andric return Ctx.getOrInsertSyncScopeID(Name); 50706c3fb27SDimitry Andric } 50806c3fb27SDimitry Andric 50906c3fb27SDimitry Andric bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { 51006c3fb27SDimitry Andric return false; 51106c3fb27SDimitry Andric } 51206c3fb27SDimitry Andric 51306c3fb27SDimitry Andric bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const { 51406c3fb27SDimitry Andric return true; 51506c3fb27SDimitry Andric } 51606c3fb27SDimitry Andric 51706c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( 51806c3fb27SDimitry Andric const FunctionType *&FT) const { 51906c3fb27SDimitry Andric FT = getABIInfo().getContext().adjustFunctionType( 52006c3fb27SDimitry Andric FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); 52106c3fb27SDimitry Andric } 52206c3fb27SDimitry Andric 52306c3fb27SDimitry Andric /// Create an OpenCL kernel for an enqueued block. 52406c3fb27SDimitry Andric /// 52506c3fb27SDimitry Andric /// The type of the first argument (the block literal) is the struct type 52606c3fb27SDimitry Andric /// of the block literal instead of a pointer type. The first argument 52706c3fb27SDimitry Andric /// (block literal) is passed directly by value to the kernel. The kernel 52806c3fb27SDimitry Andric /// allocates the same type of struct on stack and stores the block literal 52906c3fb27SDimitry Andric /// to it and passes its pointer to the block invoke function. The kernel 53006c3fb27SDimitry Andric /// has "enqueued-block" function attribute and kernel argument metadata. 53106c3fb27SDimitry Andric llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( 53206c3fb27SDimitry Andric CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const { 53306c3fb27SDimitry Andric auto &Builder = CGF.Builder; 53406c3fb27SDimitry Andric auto &C = CGF.getLLVMContext(); 53506c3fb27SDimitry Andric 53606c3fb27SDimitry Andric auto *InvokeFT = Invoke->getFunctionType(); 53706c3fb27SDimitry Andric llvm::SmallVector<llvm::Type *, 2> ArgTys; 53806c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> AddressQuals; 53906c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> AccessQuals; 54006c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames; 54106c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames; 54206c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals; 54306c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> ArgNames; 54406c3fb27SDimitry Andric 54506c3fb27SDimitry Andric ArgTys.push_back(BlockTy); 54606c3fb27SDimitry Andric ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); 54706c3fb27SDimitry Andric AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); 54806c3fb27SDimitry Andric ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); 54906c3fb27SDimitry Andric ArgTypeQuals.push_back(llvm::MDString::get(C, "")); 55006c3fb27SDimitry Andric AccessQuals.push_back(llvm::MDString::get(C, "none")); 55106c3fb27SDimitry Andric ArgNames.push_back(llvm::MDString::get(C, "block_literal")); 55206c3fb27SDimitry Andric for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { 55306c3fb27SDimitry Andric ArgTys.push_back(InvokeFT->getParamType(I)); 55406c3fb27SDimitry Andric ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); 55506c3fb27SDimitry Andric AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); 55606c3fb27SDimitry Andric AccessQuals.push_back(llvm::MDString::get(C, "none")); 55706c3fb27SDimitry Andric ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); 55806c3fb27SDimitry Andric ArgTypeQuals.push_back(llvm::MDString::get(C, "")); 55906c3fb27SDimitry Andric ArgNames.push_back( 56006c3fb27SDimitry Andric llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); 56106c3fb27SDimitry Andric } 56206c3fb27SDimitry Andric std::string Name = Invoke->getName().str() + "_kernel"; 56306c3fb27SDimitry Andric auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); 56406c3fb27SDimitry Andric auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, 56506c3fb27SDimitry Andric &CGF.CGM.getModule()); 56606c3fb27SDimitry Andric F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 56706c3fb27SDimitry Andric 56806c3fb27SDimitry Andric llvm::AttrBuilder KernelAttrs(C); 56906c3fb27SDimitry Andric // FIXME: The invoke isn't applying the right attributes either 57006c3fb27SDimitry Andric // FIXME: This is missing setTargetAttributes 57106c3fb27SDimitry Andric CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs); 57206c3fb27SDimitry Andric KernelAttrs.addAttribute("enqueued-block"); 57306c3fb27SDimitry Andric F->addFnAttrs(KernelAttrs); 57406c3fb27SDimitry Andric 57506c3fb27SDimitry Andric auto IP = CGF.Builder.saveIP(); 57606c3fb27SDimitry Andric auto *BB = llvm::BasicBlock::Create(C, "entry", F); 57706c3fb27SDimitry Andric Builder.SetInsertPoint(BB); 57806c3fb27SDimitry Andric const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy); 57906c3fb27SDimitry Andric auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); 58006c3fb27SDimitry Andric BlockPtr->setAlignment(BlockAlign); 58106c3fb27SDimitry Andric Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); 58206c3fb27SDimitry Andric auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); 58306c3fb27SDimitry Andric llvm::SmallVector<llvm::Value *, 2> Args; 58406c3fb27SDimitry Andric Args.push_back(Cast); 58506c3fb27SDimitry Andric for (llvm::Argument &A : llvm::drop_begin(F->args())) 58606c3fb27SDimitry Andric Args.push_back(&A); 58706c3fb27SDimitry Andric llvm::CallInst *call = Builder.CreateCall(Invoke, Args); 58806c3fb27SDimitry Andric call->setCallingConv(Invoke->getCallingConv()); 58906c3fb27SDimitry Andric Builder.CreateRetVoid(); 59006c3fb27SDimitry Andric Builder.restoreIP(IP); 59106c3fb27SDimitry Andric 59206c3fb27SDimitry Andric F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); 59306c3fb27SDimitry Andric F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); 59406c3fb27SDimitry Andric F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); 59506c3fb27SDimitry Andric F->setMetadata("kernel_arg_base_type", 59606c3fb27SDimitry Andric llvm::MDNode::get(C, ArgBaseTypeNames)); 59706c3fb27SDimitry Andric F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); 59806c3fb27SDimitry Andric if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) 59906c3fb27SDimitry Andric F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); 60006c3fb27SDimitry Andric 60106c3fb27SDimitry Andric return F; 60206c3fb27SDimitry Andric } 60306c3fb27SDimitry Andric 604*5f757f3fSDimitry Andric void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr( 605*5f757f3fSDimitry Andric llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS, 606*5f757f3fSDimitry Andric const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal, 607*5f757f3fSDimitry Andric int32_t *MaxThreadsVal) { 608*5f757f3fSDimitry Andric unsigned Min = 0; 609*5f757f3fSDimitry Andric unsigned Max = 0; 610*5f757f3fSDimitry Andric if (FlatWGS) { 611*5f757f3fSDimitry Andric Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); 612*5f757f3fSDimitry Andric Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue(); 613*5f757f3fSDimitry Andric } 614*5f757f3fSDimitry Andric if (ReqdWGS && Min == 0 && Max == 0) 615*5f757f3fSDimitry Andric Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); 616*5f757f3fSDimitry Andric 617*5f757f3fSDimitry Andric if (Min != 0) { 618*5f757f3fSDimitry Andric assert(Min <= Max && "Min must be less than or equal Max"); 619*5f757f3fSDimitry Andric 620*5f757f3fSDimitry Andric if (MinThreadsVal) 621*5f757f3fSDimitry Andric *MinThreadsVal = Min; 622*5f757f3fSDimitry Andric if (MaxThreadsVal) 623*5f757f3fSDimitry Andric *MaxThreadsVal = Max; 624*5f757f3fSDimitry Andric std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); 625*5f757f3fSDimitry Andric if (F) 626*5f757f3fSDimitry Andric F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); 627*5f757f3fSDimitry Andric } else 628*5f757f3fSDimitry Andric assert(Max == 0 && "Max must be zero"); 629*5f757f3fSDimitry Andric } 630*5f757f3fSDimitry Andric 631*5f757f3fSDimitry Andric void CodeGenModule::handleAMDGPUWavesPerEUAttr( 632*5f757f3fSDimitry Andric llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) { 633*5f757f3fSDimitry Andric unsigned Min = 634*5f757f3fSDimitry Andric Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); 635*5f757f3fSDimitry Andric unsigned Max = 636*5f757f3fSDimitry Andric Attr->getMax() 637*5f757f3fSDimitry Andric ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue() 638*5f757f3fSDimitry Andric : 0; 639*5f757f3fSDimitry Andric 640*5f757f3fSDimitry Andric if (Min != 0) { 641*5f757f3fSDimitry Andric assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); 642*5f757f3fSDimitry Andric 643*5f757f3fSDimitry Andric std::string AttrVal = llvm::utostr(Min); 644*5f757f3fSDimitry Andric if (Max != 0) 645*5f757f3fSDimitry Andric AttrVal = AttrVal + "," + llvm::utostr(Max); 646*5f757f3fSDimitry Andric F->addFnAttr("amdgpu-waves-per-eu", AttrVal); 647*5f757f3fSDimitry Andric } else 648*5f757f3fSDimitry Andric assert(Max == 0 && "Max must be zero"); 649*5f757f3fSDimitry Andric } 650*5f757f3fSDimitry Andric 65106c3fb27SDimitry Andric std::unique_ptr<TargetCodeGenInfo> 65206c3fb27SDimitry Andric CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) { 65306c3fb27SDimitry Andric return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes()); 65406c3fb27SDimitry Andric } 655