1992cb984SSergei Barannikov //===- AMDGPU.cpp ---------------------------------------------------------===// 2992cb984SSergei Barannikov // 3992cb984SSergei Barannikov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4992cb984SSergei Barannikov // See https://llvm.org/LICENSE.txt for license information. 5992cb984SSergei Barannikov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6992cb984SSergei Barannikov // 7992cb984SSergei Barannikov //===----------------------------------------------------------------------===// 8992cb984SSergei Barannikov 9992cb984SSergei Barannikov #include "ABIInfoImpl.h" 10992cb984SSergei Barannikov #include "TargetInfo.h" 11f616c3eeSSaiyedul Islam #include "clang/Basic/TargetOptions.h" 1251b4ada4SMatt Arsenault #include "llvm/Support/AMDGPUAddrSpace.h" 13992cb984SSergei Barannikov 14992cb984SSergei Barannikov using namespace clang; 15992cb984SSergei Barannikov using namespace clang::CodeGen; 16992cb984SSergei Barannikov 17992cb984SSergei Barannikov //===----------------------------------------------------------------------===// 18992cb984SSergei Barannikov // AMDGPU ABI Implementation 19992cb984SSergei Barannikov //===----------------------------------------------------------------------===// 20992cb984SSergei Barannikov 21992cb984SSergei Barannikov namespace { 22992cb984SSergei Barannikov 23992cb984SSergei Barannikov class AMDGPUABIInfo final : public DefaultABIInfo { 24992cb984SSergei Barannikov private: 25992cb984SSergei Barannikov static const unsigned MaxNumRegsForArgsRet = 16; 26992cb984SSergei Barannikov 27992cb984SSergei Barannikov unsigned numRegsForType(QualType Ty) const; 28992cb984SSergei Barannikov 29992cb984SSergei Barannikov bool isHomogeneousAggregateBaseType(QualType Ty) const override; 30992cb984SSergei Barannikov bool isHomogeneousAggregateSmallEnough(const Type *Base, 31992cb984SSergei Barannikov uint64_t Members) const override; 32992cb984SSergei Barannikov 33992cb984SSergei Barannikov // Coerce HIP scalar pointer arguments from generic pointers to global ones. 34992cb984SSergei Barannikov llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, 35992cb984SSergei Barannikov unsigned ToAS) const { 36992cb984SSergei Barannikov // Single value types. 37992cb984SSergei Barannikov auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty); 38992cb984SSergei Barannikov if (PtrTy && PtrTy->getAddressSpace() == FromAS) 39992cb984SSergei Barannikov return llvm::PointerType::get(Ty->getContext(), ToAS); 40992cb984SSergei Barannikov return Ty; 41992cb984SSergei Barannikov } 42992cb984SSergei Barannikov 43992cb984SSergei Barannikov public: 44992cb984SSergei Barannikov explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : 45992cb984SSergei Barannikov DefaultABIInfo(CGT) {} 46992cb984SSergei Barannikov 47992cb984SSergei Barannikov ABIArgInfo classifyReturnType(QualType RetTy) const; 48992cb984SSergei Barannikov ABIArgInfo classifyKernelArgumentType(QualType Ty) const; 49794457f6SJon Chesterfield ABIArgInfo classifyArgumentType(QualType Ty, bool Variadic, 50794457f6SJon Chesterfield unsigned &NumRegsLeft) const; 51992cb984SSergei Barannikov 52992cb984SSergei Barannikov void computeInfo(CGFunctionInfo &FI) const override; 536d973b45SMariya Podchishchaeva RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 546d973b45SMariya Podchishchaeva AggValueSlot Slot) const override; 55*03744d2aSShilei Tian 56*03744d2aSShilei Tian llvm::FixedVectorType * 57*03744d2aSShilei Tian getOptimalVectorMemoryType(llvm::FixedVectorType *T, 58*03744d2aSShilei Tian const LangOptions &Opt) const override { 59*03744d2aSShilei Tian // We have legal instructions for 96-bit so 3x32 can be supported. 60*03744d2aSShilei Tian // FIXME: This check should be a subtarget feature as technically SI doesn't 61*03744d2aSShilei Tian // support it. 62*03744d2aSShilei Tian if (T->getNumElements() == 3 && getDataLayout().getTypeSizeInBits(T) == 96) 63*03744d2aSShilei Tian return T; 64*03744d2aSShilei Tian return DefaultABIInfo::getOptimalVectorMemoryType(T, Opt); 65*03744d2aSShilei Tian } 66992cb984SSergei Barannikov }; 67992cb984SSergei Barannikov 68992cb984SSergei Barannikov bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { 69992cb984SSergei Barannikov return true; 70992cb984SSergei Barannikov } 71992cb984SSergei Barannikov 72992cb984SSergei Barannikov bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( 73992cb984SSergei Barannikov const Type *Base, uint64_t Members) const { 74992cb984SSergei Barannikov uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; 75992cb984SSergei Barannikov 76992cb984SSergei Barannikov // Homogeneous Aggregates may occupy at most 16 registers. 77992cb984SSergei Barannikov return Members * NumRegs <= MaxNumRegsForArgsRet; 78992cb984SSergei Barannikov } 79992cb984SSergei Barannikov 80992cb984SSergei Barannikov /// Estimate number of registers the type will use when passed in registers. 81992cb984SSergei Barannikov unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { 82992cb984SSergei Barannikov unsigned NumRegs = 0; 83992cb984SSergei Barannikov 84992cb984SSergei Barannikov if (const VectorType *VT = Ty->getAs<VectorType>()) { 85992cb984SSergei Barannikov // Compute from the number of elements. The reported size is based on the 86992cb984SSergei Barannikov // in-memory size, which includes the padding 4th element for 3-vectors. 87992cb984SSergei Barannikov QualType EltTy = VT->getElementType(); 88992cb984SSergei Barannikov unsigned EltSize = getContext().getTypeSize(EltTy); 89992cb984SSergei Barannikov 90992cb984SSergei Barannikov // 16-bit element vectors should be passed as packed. 91992cb984SSergei Barannikov if (EltSize == 16) 92992cb984SSergei Barannikov return (VT->getNumElements() + 1) / 2; 93992cb984SSergei Barannikov 94992cb984SSergei Barannikov unsigned EltNumRegs = (EltSize + 31) / 32; 95992cb984SSergei Barannikov return EltNumRegs * VT->getNumElements(); 96992cb984SSergei Barannikov } 97992cb984SSergei Barannikov 98992cb984SSergei Barannikov if (const RecordType *RT = Ty->getAs<RecordType>()) { 99992cb984SSergei Barannikov const RecordDecl *RD = RT->getDecl(); 100992cb984SSergei Barannikov assert(!RD->hasFlexibleArrayMember()); 101992cb984SSergei Barannikov 102992cb984SSergei Barannikov for (const FieldDecl *Field : RD->fields()) { 103992cb984SSergei Barannikov QualType FieldTy = Field->getType(); 104992cb984SSergei Barannikov NumRegs += numRegsForType(FieldTy); 105992cb984SSergei Barannikov } 106992cb984SSergei Barannikov 107992cb984SSergei Barannikov return NumRegs; 108992cb984SSergei Barannikov } 109992cb984SSergei Barannikov 110992cb984SSergei Barannikov return (getContext().getTypeSize(Ty) + 31) / 32; 111992cb984SSergei Barannikov } 112992cb984SSergei Barannikov 113992cb984SSergei Barannikov void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { 114992cb984SSergei Barannikov llvm::CallingConv::ID CC = FI.getCallingConvention(); 115992cb984SSergei Barannikov 116992cb984SSergei Barannikov if (!getCXXABI().classifyReturnType(FI)) 117992cb984SSergei Barannikov FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 118992cb984SSergei Barannikov 119794457f6SJon Chesterfield unsigned ArgumentIndex = 0; 120794457f6SJon Chesterfield const unsigned numFixedArguments = FI.getNumRequiredArgs(); 121794457f6SJon Chesterfield 122992cb984SSergei Barannikov unsigned NumRegsLeft = MaxNumRegsForArgsRet; 123992cb984SSergei Barannikov for (auto &Arg : FI.arguments()) { 124992cb984SSergei Barannikov if (CC == llvm::CallingConv::AMDGPU_KERNEL) { 125992cb984SSergei Barannikov Arg.info = classifyKernelArgumentType(Arg.type); 126992cb984SSergei Barannikov } else { 127794457f6SJon Chesterfield bool FixedArgument = ArgumentIndex++ < numFixedArguments; 128794457f6SJon Chesterfield Arg.info = classifyArgumentType(Arg.type, !FixedArgument, NumRegsLeft); 129992cb984SSergei Barannikov } 130992cb984SSergei Barannikov } 131992cb984SSergei Barannikov } 132992cb984SSergei Barannikov 1336d973b45SMariya Podchishchaeva RValue AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 1346d973b45SMariya Podchishchaeva QualType Ty, AggValueSlot Slot) const { 1358516f54eSJon Chesterfield const bool IsIndirect = false; 1368516f54eSJon Chesterfield const bool AllowHigherAlign = false; 1378516f54eSJon Chesterfield return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 1388516f54eSJon Chesterfield getContext().getTypeInfoInChars(Ty), 1396d973b45SMariya Podchishchaeva CharUnits::fromQuantity(4), AllowHigherAlign, Slot); 140992cb984SSergei Barannikov } 141992cb984SSergei Barannikov 142992cb984SSergei Barannikov ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { 143992cb984SSergei Barannikov if (isAggregateTypeForABI(RetTy)) { 144992cb984SSergei Barannikov // Records with non-trivial destructors/copy-constructors should not be 145992cb984SSergei Barannikov // returned by value. 146992cb984SSergei Barannikov if (!getRecordArgABI(RetTy, getCXXABI())) { 147992cb984SSergei Barannikov // Ignore empty structs/unions. 148992cb984SSergei Barannikov if (isEmptyRecord(getContext(), RetTy, true)) 149992cb984SSergei Barannikov return ABIArgInfo::getIgnore(); 150992cb984SSergei Barannikov 151992cb984SSergei Barannikov // Lower single-element structs to just return a regular value. 152992cb984SSergei Barannikov if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) 153992cb984SSergei Barannikov return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 154992cb984SSergei Barannikov 155992cb984SSergei Barannikov if (const RecordType *RT = RetTy->getAs<RecordType>()) { 156992cb984SSergei Barannikov const RecordDecl *RD = RT->getDecl(); 157992cb984SSergei Barannikov if (RD->hasFlexibleArrayMember()) 158992cb984SSergei Barannikov return DefaultABIInfo::classifyReturnType(RetTy); 159992cb984SSergei Barannikov } 160992cb984SSergei Barannikov 161992cb984SSergei Barannikov // Pack aggregates <= 4 bytes into single VGPR or pair. 162992cb984SSergei Barannikov uint64_t Size = getContext().getTypeSize(RetTy); 163992cb984SSergei Barannikov if (Size <= 16) 164992cb984SSergei Barannikov return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); 165992cb984SSergei Barannikov 166992cb984SSergei Barannikov if (Size <= 32) 167992cb984SSergei Barannikov return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); 168992cb984SSergei Barannikov 169992cb984SSergei Barannikov if (Size <= 64) { 170992cb984SSergei Barannikov llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); 171992cb984SSergei Barannikov return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); 172992cb984SSergei Barannikov } 173992cb984SSergei Barannikov 174992cb984SSergei Barannikov if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) 175992cb984SSergei Barannikov return ABIArgInfo::getDirect(); 176992cb984SSergei Barannikov } 177992cb984SSergei Barannikov } 178992cb984SSergei Barannikov 179992cb984SSergei Barannikov // Otherwise just do the default thing. 180992cb984SSergei Barannikov return DefaultABIInfo::classifyReturnType(RetTy); 181992cb984SSergei Barannikov } 182992cb984SSergei Barannikov 183992cb984SSergei Barannikov /// For kernels all parameters are really passed in a special buffer. It doesn't 184992cb984SSergei Barannikov /// make sense to pass anything byval, so everything must be direct. 185992cb984SSergei Barannikov ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { 186992cb984SSergei Barannikov Ty = useFirstFieldIfTransparentUnion(Ty); 187992cb984SSergei Barannikov 188992cb984SSergei Barannikov // TODO: Can we omit empty structs? 189992cb984SSergei Barannikov 190992cb984SSergei Barannikov if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) 191992cb984SSergei Barannikov Ty = QualType(SeltTy, 0); 192992cb984SSergei Barannikov 193992cb984SSergei Barannikov llvm::Type *OrigLTy = CGT.ConvertType(Ty); 194992cb984SSergei Barannikov llvm::Type *LTy = OrigLTy; 195992cb984SSergei Barannikov if (getContext().getLangOpts().HIP) { 196992cb984SSergei Barannikov LTy = coerceKernelArgumentType( 197992cb984SSergei Barannikov OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), 198992cb984SSergei Barannikov /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); 199992cb984SSergei Barannikov } 200992cb984SSergei Barannikov 201992cb984SSergei Barannikov // FIXME: Should also use this for OpenCL, but it requires addressing the 202992cb984SSergei Barannikov // problem of kernels being called. 203992cb984SSergei Barannikov // 204992cb984SSergei Barannikov // FIXME: This doesn't apply the optimization of coercing pointers in structs 205992cb984SSergei Barannikov // to global address space when using byref. This would require implementing a 206992cb984SSergei Barannikov // new kind of coercion of the in-memory type when for indirect arguments. 207992cb984SSergei Barannikov if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy && 208992cb984SSergei Barannikov isAggregateTypeForABI(Ty)) { 209992cb984SSergei Barannikov return ABIArgInfo::getIndirectAliased( 210992cb984SSergei Barannikov getContext().getTypeAlignInChars(Ty), 211992cb984SSergei Barannikov getContext().getTargetAddressSpace(LangAS::opencl_constant), 212992cb984SSergei Barannikov false /*Realign*/, nullptr /*Padding*/); 213992cb984SSergei Barannikov } 214992cb984SSergei Barannikov 215992cb984SSergei Barannikov // If we set CanBeFlattened to true, CodeGen will expand the struct to its 216992cb984SSergei Barannikov // individual elements, which confuses the Clover OpenCL backend; therefore we 217992cb984SSergei Barannikov // have to set it to false here. Other args of getDirect() are just defaults. 218992cb984SSergei Barannikov return ABIArgInfo::getDirect(LTy, 0, nullptr, false); 219992cb984SSergei Barannikov } 220992cb984SSergei Barannikov 221794457f6SJon Chesterfield ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic, 222992cb984SSergei Barannikov unsigned &NumRegsLeft) const { 223992cb984SSergei Barannikov assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); 224992cb984SSergei Barannikov 225992cb984SSergei Barannikov Ty = useFirstFieldIfTransparentUnion(Ty); 226992cb984SSergei Barannikov 227794457f6SJon Chesterfield if (Variadic) { 228794457f6SJon Chesterfield return ABIArgInfo::getDirect(/*T=*/nullptr, 229794457f6SJon Chesterfield /*Offset=*/0, 230794457f6SJon Chesterfield /*Padding=*/nullptr, 231794457f6SJon Chesterfield /*CanBeFlattened=*/false, 232794457f6SJon Chesterfield /*Align=*/0); 233794457f6SJon Chesterfield } 234794457f6SJon Chesterfield 235992cb984SSergei Barannikov if (isAggregateTypeForABI(Ty)) { 236992cb984SSergei Barannikov // Records with non-trivial destructors/copy-constructors should not be 237992cb984SSergei Barannikov // passed by value. 238992cb984SSergei Barannikov if (auto RAA = getRecordArgABI(Ty, getCXXABI())) 239992cb984SSergei Barannikov return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 240992cb984SSergei Barannikov 241992cb984SSergei Barannikov // Ignore empty structs/unions. 242992cb984SSergei Barannikov if (isEmptyRecord(getContext(), Ty, true)) 243992cb984SSergei Barannikov return ABIArgInfo::getIgnore(); 244992cb984SSergei Barannikov 245992cb984SSergei Barannikov // Lower single-element structs to just pass a regular value. TODO: We 246992cb984SSergei Barannikov // could do reasonable-size multiple-element structs too, using getExpand(), 247992cb984SSergei Barannikov // though watch out for things like bitfields. 248992cb984SSergei Barannikov if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) 249992cb984SSergei Barannikov return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 250992cb984SSergei Barannikov 251992cb984SSergei Barannikov if (const RecordType *RT = Ty->getAs<RecordType>()) { 252992cb984SSergei Barannikov const RecordDecl *RD = RT->getDecl(); 253992cb984SSergei Barannikov if (RD->hasFlexibleArrayMember()) 254992cb984SSergei Barannikov return DefaultABIInfo::classifyArgumentType(Ty); 255992cb984SSergei Barannikov } 256992cb984SSergei Barannikov 257992cb984SSergei Barannikov // Pack aggregates <= 8 bytes into single VGPR or pair. 258992cb984SSergei Barannikov uint64_t Size = getContext().getTypeSize(Ty); 259992cb984SSergei Barannikov if (Size <= 64) { 260992cb984SSergei Barannikov unsigned NumRegs = (Size + 31) / 32; 261992cb984SSergei Barannikov NumRegsLeft -= std::min(NumRegsLeft, NumRegs); 262992cb984SSergei Barannikov 263992cb984SSergei Barannikov if (Size <= 16) 264992cb984SSergei Barannikov return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); 265992cb984SSergei Barannikov 266992cb984SSergei Barannikov if (Size <= 32) 267992cb984SSergei Barannikov return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); 268992cb984SSergei Barannikov 269992cb984SSergei Barannikov // XXX: Should this be i64 instead, and should the limit increase? 270992cb984SSergei Barannikov llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); 271992cb984SSergei Barannikov return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); 272992cb984SSergei Barannikov } 273992cb984SSergei Barannikov 274992cb984SSergei Barannikov if (NumRegsLeft > 0) { 275992cb984SSergei Barannikov unsigned NumRegs = numRegsForType(Ty); 276992cb984SSergei Barannikov if (NumRegsLeft >= NumRegs) { 277992cb984SSergei Barannikov NumRegsLeft -= NumRegs; 278992cb984SSergei Barannikov return ABIArgInfo::getDirect(); 279992cb984SSergei Barannikov } 280992cb984SSergei Barannikov } 281d77c6205SChangpeng Fang 282d77c6205SChangpeng Fang // Use pass-by-reference in stead of pass-by-value for struct arguments in 283d77c6205SChangpeng Fang // function ABI. 284d77c6205SChangpeng Fang return ABIArgInfo::getIndirectAliased( 285d77c6205SChangpeng Fang getContext().getTypeAlignInChars(Ty), 286d77c6205SChangpeng Fang getContext().getTargetAddressSpace(LangAS::opencl_private)); 287992cb984SSergei Barannikov } 288992cb984SSergei Barannikov 289992cb984SSergei Barannikov // Otherwise just do the default thing. 290992cb984SSergei Barannikov ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); 291992cb984SSergei Barannikov if (!ArgInfo.isIndirect()) { 292992cb984SSergei Barannikov unsigned NumRegs = numRegsForType(Ty); 293992cb984SSergei Barannikov NumRegsLeft -= std::min(NumRegs, NumRegsLeft); 294992cb984SSergei Barannikov } 295992cb984SSergei Barannikov 296992cb984SSergei Barannikov return ArgInfo; 297992cb984SSergei Barannikov } 298992cb984SSergei Barannikov 299992cb984SSergei Barannikov class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { 300992cb984SSergei Barannikov public: 301992cb984SSergei Barannikov AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) 302992cb984SSergei Barannikov : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {} 303992cb984SSergei Barannikov 304992cb984SSergei Barannikov void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, 305992cb984SSergei Barannikov CodeGenModule &CGM) const; 306992cb984SSergei Barannikov 307f616c3eeSSaiyedul Islam void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override; 308f616c3eeSSaiyedul Islam 309992cb984SSergei Barannikov void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 310992cb984SSergei Barannikov CodeGen::CodeGenModule &M) const override; 311992cb984SSergei Barannikov unsigned getOpenCLKernelCallingConv() const override; 312992cb984SSergei Barannikov 313992cb984SSergei Barannikov llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, 314992cb984SSergei Barannikov llvm::PointerType *T, QualType QT) const override; 315992cb984SSergei Barannikov 316992cb984SSergei Barannikov LangAS getASTAllocaAddressSpace() const override { 317992cb984SSergei Barannikov return getLangASFromTargetAS( 318992cb984SSergei Barannikov getABIInfo().getDataLayout().getAllocaAddrSpace()); 319992cb984SSergei Barannikov } 320992cb984SSergei Barannikov LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, 321992cb984SSergei Barannikov const VarDecl *D) const override; 322992cb984SSergei Barannikov llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, 323992cb984SSergei Barannikov SyncScope Scope, 324992cb984SSergei Barannikov llvm::AtomicOrdering Ordering, 325992cb984SSergei Barannikov llvm::LLVMContext &Ctx) const override; 326e108853aSMatt Arsenault void setTargetAtomicMetadata(CodeGenFunction &CGF, 32751b4ada4SMatt Arsenault llvm::Instruction &AtomicInst, 32851b4ada4SMatt Arsenault const AtomicExpr *Expr = nullptr) const override; 329992cb984SSergei Barannikov llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF, 330992cb984SSergei Barannikov llvm::Function *BlockInvokeFunc, 331992cb984SSergei Barannikov llvm::Type *BlockTy) const override; 332992cb984SSergei Barannikov bool shouldEmitStaticExternCAliases() const override; 333992cb984SSergei Barannikov bool shouldEmitDWARFBitFieldSeparators() const override; 334992cb984SSergei Barannikov void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; 335992cb984SSergei Barannikov }; 336992cb984SSergei Barannikov } 337992cb984SSergei Barannikov 338992cb984SSergei Barannikov static bool requiresAMDGPUProtectedVisibility(const Decl *D, 339992cb984SSergei Barannikov llvm::GlobalValue *GV) { 340992cb984SSergei Barannikov if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) 341992cb984SSergei Barannikov return false; 342992cb984SSergei Barannikov 3431d959f93SJoseph Huber return !D->hasAttr<OMPDeclareTargetDeclAttr>() && 3441d959f93SJoseph Huber (D->hasAttr<OpenCLKernelAttr>() || 345992cb984SSergei Barannikov (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || 346992cb984SSergei Barannikov (isa<VarDecl>(D) && 347992cb984SSergei Barannikov (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || 348992cb984SSergei Barannikov cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() || 3491d959f93SJoseph Huber cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()))); 350992cb984SSergei Barannikov } 351992cb984SSergei Barannikov 352992cb984SSergei Barannikov void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( 353992cb984SSergei Barannikov const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const { 354992cb984SSergei Barannikov const auto *ReqdWGS = 355992cb984SSergei Barannikov M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; 356992cb984SSergei Barannikov const bool IsOpenCLKernel = 357992cb984SSergei Barannikov M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); 358992cb984SSergei Barannikov const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>(); 359992cb984SSergei Barannikov 360992cb984SSergei Barannikov const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); 361992cb984SSergei Barannikov if (ReqdWGS || FlatWGS) { 36208a22076SJohannes Doerfert M.handleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS); 363992cb984SSergei Barannikov } else if (IsOpenCLKernel || IsHIPKernel) { 364992cb984SSergei Barannikov // By default, restrict the maximum size to a value specified by 365992cb984SSergei Barannikov // --gpu-max-threads-per-block=n or its default value for HIP. 366992cb984SSergei Barannikov const unsigned OpenCLDefaultMaxWorkGroupSize = 256; 367992cb984SSergei Barannikov const unsigned DefaultMaxWorkGroupSize = 368992cb984SSergei Barannikov IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize 369992cb984SSergei Barannikov : M.getLangOpts().GPUMaxThreadsPerBlock; 370992cb984SSergei Barannikov std::string AttrVal = 371992cb984SSergei Barannikov std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); 372992cb984SSergei Barannikov F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); 373992cb984SSergei Barannikov } 374992cb984SSergei Barannikov 37508a22076SJohannes Doerfert if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) 37608a22076SJohannes Doerfert M.handleAMDGPUWavesPerEUAttr(F, Attr); 377992cb984SSergei Barannikov 378992cb984SSergei Barannikov if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { 379992cb984SSergei Barannikov unsigned NumSGPR = Attr->getNumSGPR(); 380992cb984SSergei Barannikov 381992cb984SSergei Barannikov if (NumSGPR != 0) 382992cb984SSergei Barannikov F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); 383992cb984SSergei Barannikov } 384992cb984SSergei Barannikov 385992cb984SSergei Barannikov if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { 386992cb984SSergei Barannikov uint32_t NumVGPR = Attr->getNumVGPR(); 387992cb984SSergei Barannikov 388992cb984SSergei Barannikov if (NumVGPR != 0) 389992cb984SSergei Barannikov F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); 390992cb984SSergei Barannikov } 391c4e517f5SJun Wang 392c4e517f5SJun Wang if (const auto *Attr = FD->getAttr<AMDGPUMaxNumWorkGroupsAttr>()) { 393c4e517f5SJun Wang uint32_t X = Attr->getMaxNumWorkGroupsX() 394c4e517f5SJun Wang ->EvaluateKnownConstInt(M.getContext()) 395c4e517f5SJun Wang .getExtValue(); 396c4e517f5SJun Wang // Y and Z dimensions default to 1 if not specified 397c4e517f5SJun Wang uint32_t Y = Attr->getMaxNumWorkGroupsY() 398c4e517f5SJun Wang ? Attr->getMaxNumWorkGroupsY() 399c4e517f5SJun Wang ->EvaluateKnownConstInt(M.getContext()) 400c4e517f5SJun Wang .getExtValue() 401c4e517f5SJun Wang : 1; 402c4e517f5SJun Wang uint32_t Z = Attr->getMaxNumWorkGroupsZ() 403c4e517f5SJun Wang ? Attr->getMaxNumWorkGroupsZ() 404c4e517f5SJun Wang ->EvaluateKnownConstInt(M.getContext()) 405c4e517f5SJun Wang .getExtValue() 406c4e517f5SJun Wang : 1; 407c4e517f5SJun Wang 408c4e517f5SJun Wang llvm::SmallString<32> AttrVal; 409c4e517f5SJun Wang llvm::raw_svector_ostream OS(AttrVal); 410c4e517f5SJun Wang OS << X << ',' << Y << ',' << Z; 411c4e517f5SJun Wang 412c4e517f5SJun Wang F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str()); 413c4e517f5SJun Wang } 414992cb984SSergei Barannikov } 415992cb984SSergei Barannikov 416f616c3eeSSaiyedul Islam /// Emits control constants used to change per-architecture behaviour in the 417f616c3eeSSaiyedul Islam /// AMDGPU ROCm device libraries. 418f616c3eeSSaiyedul Islam void AMDGPUTargetCodeGenInfo::emitTargetGlobals( 419f616c3eeSSaiyedul Islam CodeGen::CodeGenModule &CGM) const { 42021861991SSaiyedul Islam StringRef Name = "__oclc_ABI_version"; 42149ff6a96SJoseph Huber llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name); 42249ff6a96SJoseph Huber if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage())) 423f616c3eeSSaiyedul Islam return; 424f616c3eeSSaiyedul Islam 42521861991SSaiyedul Islam if (CGM.getTarget().getTargetOpts().CodeObjectVersion == 42695943d2fSDominik Adamski llvm::CodeObjectVersionKind::COV_None) 42721861991SSaiyedul Islam return; 42821861991SSaiyedul Islam 429f616c3eeSSaiyedul Islam auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32); 430f616c3eeSSaiyedul Islam llvm::Constant *COV = llvm::ConstantInt::get( 431f616c3eeSSaiyedul Islam Type, CGM.getTarget().getTargetOpts().CodeObjectVersion); 432f616c3eeSSaiyedul Islam 433f616c3eeSSaiyedul Islam // It needs to be constant weak_odr without externally_initialized so that 434f616c3eeSSaiyedul Islam // the load instuction can be eliminated by the IPSCCP. 435f616c3eeSSaiyedul Islam auto *GV = new llvm::GlobalVariable( 436f616c3eeSSaiyedul Islam CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name, 437f616c3eeSSaiyedul Islam nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 438f616c3eeSSaiyedul Islam CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); 439f616c3eeSSaiyedul Islam GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); 440f616c3eeSSaiyedul Islam GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility); 44149ff6a96SJoseph Huber 44249ff6a96SJoseph Huber // Replace any external references to this variable with the new global. 44349ff6a96SJoseph Huber if (OriginalGV) { 44449ff6a96SJoseph Huber OriginalGV->replaceAllUsesWith(GV); 44549ff6a96SJoseph Huber GV->takeName(OriginalGV); 44649ff6a96SJoseph Huber OriginalGV->eraseFromParent(); 44749ff6a96SJoseph Huber } 448f616c3eeSSaiyedul Islam } 449f616c3eeSSaiyedul Islam 450992cb984SSergei Barannikov void AMDGPUTargetCodeGenInfo::setTargetAttributes( 451992cb984SSergei Barannikov const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { 452992cb984SSergei Barannikov if (requiresAMDGPUProtectedVisibility(D, GV)) { 453992cb984SSergei Barannikov GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); 454992cb984SSergei Barannikov GV->setDSOLocal(true); 455992cb984SSergei Barannikov } 456992cb984SSergei Barannikov 457992cb984SSergei Barannikov if (GV->isDeclaration()) 458992cb984SSergei Barannikov return; 459992cb984SSergei Barannikov 460992cb984SSergei Barannikov llvm::Function *F = dyn_cast<llvm::Function>(GV); 461992cb984SSergei Barannikov if (!F) 462992cb984SSergei Barannikov return; 463992cb984SSergei Barannikov 464992cb984SSergei Barannikov const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); 465992cb984SSergei Barannikov if (FD) 466992cb984SSergei Barannikov setFunctionDeclAttributes(FD, F, M); 467992cb984SSergei Barannikov 468992cb984SSergei Barannikov if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) 469992cb984SSergei Barannikov F->addFnAttr("amdgpu-ieee", "false"); 470992cb984SSergei Barannikov } 471992cb984SSergei Barannikov 472992cb984SSergei Barannikov unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { 473992cb984SSergei Barannikov return llvm::CallingConv::AMDGPU_KERNEL; 474992cb984SSergei Barannikov } 475992cb984SSergei Barannikov 476992cb984SSergei Barannikov // Currently LLVM assumes null pointers always have value 0, 477992cb984SSergei Barannikov // which results in incorrectly transformed IR. Therefore, instead of 478992cb984SSergei Barannikov // emitting null pointers in private and local address spaces, a null 479992cb984SSergei Barannikov // pointer in generic address space is emitted which is casted to a 480992cb984SSergei Barannikov // pointer in local or private address space. 481992cb984SSergei Barannikov llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( 482992cb984SSergei Barannikov const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT, 483992cb984SSergei Barannikov QualType QT) const { 484992cb984SSergei Barannikov if (CGM.getContext().getTargetNullPointerValue(QT) == 0) 485992cb984SSergei Barannikov return llvm::ConstantPointerNull::get(PT); 486992cb984SSergei Barannikov 487992cb984SSergei Barannikov auto &Ctx = CGM.getContext(); 488992cb984SSergei Barannikov auto NPT = llvm::PointerType::get( 489992cb984SSergei Barannikov PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); 490992cb984SSergei Barannikov return llvm::ConstantExpr::getAddrSpaceCast( 491992cb984SSergei Barannikov llvm::ConstantPointerNull::get(NPT), PT); 492992cb984SSergei Barannikov } 493992cb984SSergei Barannikov 494992cb984SSergei Barannikov LangAS 495992cb984SSergei Barannikov AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, 496992cb984SSergei Barannikov const VarDecl *D) const { 497992cb984SSergei Barannikov assert(!CGM.getLangOpts().OpenCL && 498992cb984SSergei Barannikov !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && 499992cb984SSergei Barannikov "Address space agnostic languages only"); 500992cb984SSergei Barannikov LangAS DefaultGlobalAS = getLangASFromTargetAS( 501992cb984SSergei Barannikov CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); 502992cb984SSergei Barannikov if (!D) 503992cb984SSergei Barannikov return DefaultGlobalAS; 504992cb984SSergei Barannikov 505992cb984SSergei Barannikov LangAS AddrSpace = D->getType().getAddressSpace(); 506992cb984SSergei Barannikov if (AddrSpace != LangAS::Default) 507992cb984SSergei Barannikov return AddrSpace; 508992cb984SSergei Barannikov 509992cb984SSergei Barannikov // Only promote to address space 4 if VarDecl has constant initialization. 51019f2b680SDavid Blaikie if (D->getType().isConstantStorage(CGM.getContext(), false, false) && 511992cb984SSergei Barannikov D->hasConstantInitialization()) { 512992cb984SSergei Barannikov if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) 513992cb984SSergei Barannikov return *ConstAS; 514992cb984SSergei Barannikov } 515992cb984SSergei Barannikov return DefaultGlobalAS; 516992cb984SSergei Barannikov } 517992cb984SSergei Barannikov 518992cb984SSergei Barannikov llvm::SyncScope::ID 519992cb984SSergei Barannikov AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, 520992cb984SSergei Barannikov SyncScope Scope, 521992cb984SSergei Barannikov llvm::AtomicOrdering Ordering, 522992cb984SSergei Barannikov llvm::LLVMContext &Ctx) const { 523992cb984SSergei Barannikov std::string Name; 524992cb984SSergei Barannikov switch (Scope) { 525992cb984SSergei Barannikov case SyncScope::HIPSingleThread: 5264e80bc7dSJoseph Huber case SyncScope::SingleScope: 527992cb984SSergei Barannikov Name = "singlethread"; 528992cb984SSergei Barannikov break; 529992cb984SSergei Barannikov case SyncScope::HIPWavefront: 530992cb984SSergei Barannikov case SyncScope::OpenCLSubGroup: 5314e80bc7dSJoseph Huber case SyncScope::WavefrontScope: 532992cb984SSergei Barannikov Name = "wavefront"; 533992cb984SSergei Barannikov break; 534992cb984SSergei Barannikov case SyncScope::HIPWorkgroup: 535992cb984SSergei Barannikov case SyncScope::OpenCLWorkGroup: 5364e80bc7dSJoseph Huber case SyncScope::WorkgroupScope: 537992cb984SSergei Barannikov Name = "workgroup"; 538992cb984SSergei Barannikov break; 539992cb984SSergei Barannikov case SyncScope::HIPAgent: 540992cb984SSergei Barannikov case SyncScope::OpenCLDevice: 5414e80bc7dSJoseph Huber case SyncScope::DeviceScope: 542992cb984SSergei Barannikov Name = "agent"; 543992cb984SSergei Barannikov break; 5444e80bc7dSJoseph Huber case SyncScope::SystemScope: 545992cb984SSergei Barannikov case SyncScope::HIPSystem: 546992cb984SSergei Barannikov case SyncScope::OpenCLAllSVMDevices: 547992cb984SSergei Barannikov Name = ""; 548992cb984SSergei Barannikov break; 549992cb984SSergei Barannikov } 550992cb984SSergei Barannikov 55181fae0d5SJoseph Huber // OpenCL assumes by default that atomic scopes are per-address space for 55281fae0d5SJoseph Huber // non-sequentially consistent operations. 55381fae0d5SJoseph Huber if (Scope >= SyncScope::OpenCLWorkGroup && 55481fae0d5SJoseph Huber Scope <= SyncScope::OpenCLSubGroup && 55581fae0d5SJoseph Huber Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { 556992cb984SSergei Barannikov if (!Name.empty()) 557992cb984SSergei Barannikov Name = Twine(Twine(Name) + Twine("-")).str(); 558992cb984SSergei Barannikov 559992cb984SSergei Barannikov Name = Twine(Twine(Name) + Twine("one-as")).str(); 560992cb984SSergei Barannikov } 561992cb984SSergei Barannikov 562992cb984SSergei Barannikov return Ctx.getOrInsertSyncScopeID(Name); 563992cb984SSergei Barannikov } 564992cb984SSergei Barannikov 565e108853aSMatt Arsenault void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata( 56651b4ada4SMatt Arsenault CodeGenFunction &CGF, llvm::Instruction &AtomicInst, 56751b4ada4SMatt Arsenault const AtomicExpr *AE) const { 56851b4ada4SMatt Arsenault auto *RMW = dyn_cast<llvm::AtomicRMWInst>(&AtomicInst); 56951b4ada4SMatt Arsenault auto *CmpX = dyn_cast<llvm::AtomicCmpXchgInst>(&AtomicInst); 57051b4ada4SMatt Arsenault 57151b4ada4SMatt Arsenault // OpenCL and old style HIP atomics consider atomics targeting thread private 57251b4ada4SMatt Arsenault // memory to be undefined. 57351b4ada4SMatt Arsenault // 57451b4ada4SMatt Arsenault // TODO: This is probably undefined for atomic load/store, but there's not 57551b4ada4SMatt Arsenault // much direct codegen benefit to knowing this. 57651b4ada4SMatt Arsenault if (((RMW && RMW->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS) || 57751b4ada4SMatt Arsenault (CmpX && 57851b4ada4SMatt Arsenault CmpX->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS)) && 57951b4ada4SMatt Arsenault AE && AE->threadPrivateMemoryAtomicsAreUndefined()) { 58051b4ada4SMatt Arsenault llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 58151b4ada4SMatt Arsenault llvm::MDNode *ASRange = MDHelper.createRange( 58251b4ada4SMatt Arsenault llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS), 58351b4ada4SMatt Arsenault llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS + 1)); 58451b4ada4SMatt Arsenault AtomicInst.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange); 58551b4ada4SMatt Arsenault } 58651b4ada4SMatt Arsenault 58751b4ada4SMatt Arsenault if (!RMW || !CGF.getTarget().allowAMDGPUUnsafeFPAtomics()) 588e108853aSMatt Arsenault return; 589e108853aSMatt Arsenault 590e108853aSMatt Arsenault // TODO: Introduce new, more controlled options that also work for integers, 591e108853aSMatt Arsenault // and deprecate allowAMDGPUUnsafeFPAtomics. 59251b4ada4SMatt Arsenault llvm::AtomicRMWInst::BinOp RMWOp = RMW->getOperation(); 593e108853aSMatt Arsenault if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) { 594e108853aSMatt Arsenault llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {}); 59551b4ada4SMatt Arsenault RMW->setMetadata("amdgpu.no.fine.grained.memory", Empty); 596e108853aSMatt Arsenault 59751b4ada4SMatt Arsenault if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW->getType()->isFloatTy()) 59851b4ada4SMatt Arsenault RMW->setMetadata("amdgpu.ignore.denormal.mode", Empty); 599e108853aSMatt Arsenault } 600e108853aSMatt Arsenault } 601e108853aSMatt Arsenault 602992cb984SSergei Barannikov bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { 603992cb984SSergei Barannikov return false; 604992cb984SSergei Barannikov } 605992cb984SSergei Barannikov 606992cb984SSergei Barannikov bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const { 607992cb984SSergei Barannikov return true; 608992cb984SSergei Barannikov } 609992cb984SSergei Barannikov 610992cb984SSergei Barannikov void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( 611992cb984SSergei Barannikov const FunctionType *&FT) const { 612992cb984SSergei Barannikov FT = getABIInfo().getContext().adjustFunctionType( 613992cb984SSergei Barannikov FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); 614992cb984SSergei Barannikov } 615992cb984SSergei Barannikov 616992cb984SSergei Barannikov /// Create an OpenCL kernel for an enqueued block. 617992cb984SSergei Barannikov /// 618992cb984SSergei Barannikov /// The type of the first argument (the block literal) is the struct type 619992cb984SSergei Barannikov /// of the block literal instead of a pointer type. The first argument 620992cb984SSergei Barannikov /// (block literal) is passed directly by value to the kernel. The kernel 621992cb984SSergei Barannikov /// allocates the same type of struct on stack and stores the block literal 622992cb984SSergei Barannikov /// to it and passes its pointer to the block invoke function. The kernel 623992cb984SSergei Barannikov /// has "enqueued-block" function attribute and kernel argument metadata. 624992cb984SSergei Barannikov llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( 625992cb984SSergei Barannikov CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const { 626992cb984SSergei Barannikov auto &Builder = CGF.Builder; 627992cb984SSergei Barannikov auto &C = CGF.getLLVMContext(); 628992cb984SSergei Barannikov 629992cb984SSergei Barannikov auto *InvokeFT = Invoke->getFunctionType(); 630992cb984SSergei Barannikov llvm::SmallVector<llvm::Type *, 2> ArgTys; 631992cb984SSergei Barannikov llvm::SmallVector<llvm::Metadata *, 8> AddressQuals; 632992cb984SSergei Barannikov llvm::SmallVector<llvm::Metadata *, 8> AccessQuals; 633992cb984SSergei Barannikov llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames; 634992cb984SSergei Barannikov llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames; 635992cb984SSergei Barannikov llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals; 636992cb984SSergei Barannikov llvm::SmallVector<llvm::Metadata *, 8> ArgNames; 637992cb984SSergei Barannikov 638992cb984SSergei Barannikov ArgTys.push_back(BlockTy); 639992cb984SSergei Barannikov ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); 640992cb984SSergei Barannikov AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); 641992cb984SSergei Barannikov ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); 642992cb984SSergei Barannikov ArgTypeQuals.push_back(llvm::MDString::get(C, "")); 643992cb984SSergei Barannikov AccessQuals.push_back(llvm::MDString::get(C, "none")); 644992cb984SSergei Barannikov ArgNames.push_back(llvm::MDString::get(C, "block_literal")); 645992cb984SSergei Barannikov for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { 646992cb984SSergei Barannikov ArgTys.push_back(InvokeFT->getParamType(I)); 647992cb984SSergei Barannikov ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); 648992cb984SSergei Barannikov AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); 649992cb984SSergei Barannikov AccessQuals.push_back(llvm::MDString::get(C, "none")); 650992cb984SSergei Barannikov ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); 651992cb984SSergei Barannikov ArgTypeQuals.push_back(llvm::MDString::get(C, "")); 652992cb984SSergei Barannikov ArgNames.push_back( 653992cb984SSergei Barannikov llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); 654992cb984SSergei Barannikov } 655992cb984SSergei Barannikov std::string Name = Invoke->getName().str() + "_kernel"; 656992cb984SSergei Barannikov auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); 657992cb984SSergei Barannikov auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, 658992cb984SSergei Barannikov &CGF.CGM.getModule()); 659992cb984SSergei Barannikov F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 660992cb984SSergei Barannikov 661992cb984SSergei Barannikov llvm::AttrBuilder KernelAttrs(C); 662992cb984SSergei Barannikov // FIXME: The invoke isn't applying the right attributes either 663992cb984SSergei Barannikov // FIXME: This is missing setTargetAttributes 664992cb984SSergei Barannikov CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs); 665992cb984SSergei Barannikov KernelAttrs.addAttribute("enqueued-block"); 666992cb984SSergei Barannikov F->addFnAttrs(KernelAttrs); 667992cb984SSergei Barannikov 668992cb984SSergei Barannikov auto IP = CGF.Builder.saveIP(); 669992cb984SSergei Barannikov auto *BB = llvm::BasicBlock::Create(C, "entry", F); 670992cb984SSergei Barannikov Builder.SetInsertPoint(BB); 671992cb984SSergei Barannikov const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy); 672992cb984SSergei Barannikov auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); 673992cb984SSergei Barannikov BlockPtr->setAlignment(BlockAlign); 674992cb984SSergei Barannikov Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); 675992cb984SSergei Barannikov auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); 676992cb984SSergei Barannikov llvm::SmallVector<llvm::Value *, 2> Args; 677992cb984SSergei Barannikov Args.push_back(Cast); 678992cb984SSergei Barannikov for (llvm::Argument &A : llvm::drop_begin(F->args())) 679992cb984SSergei Barannikov Args.push_back(&A); 680992cb984SSergei Barannikov llvm::CallInst *call = Builder.CreateCall(Invoke, Args); 681992cb984SSergei Barannikov call->setCallingConv(Invoke->getCallingConv()); 682992cb984SSergei Barannikov Builder.CreateRetVoid(); 683992cb984SSergei Barannikov Builder.restoreIP(IP); 684992cb984SSergei Barannikov 685992cb984SSergei Barannikov F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); 686992cb984SSergei Barannikov F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); 687992cb984SSergei Barannikov F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); 688992cb984SSergei Barannikov F->setMetadata("kernel_arg_base_type", 689992cb984SSergei Barannikov llvm::MDNode::get(C, ArgBaseTypeNames)); 690992cb984SSergei Barannikov F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); 691992cb984SSergei Barannikov if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) 692992cb984SSergei Barannikov F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); 693992cb984SSergei Barannikov 694992cb984SSergei Barannikov return F; 695992cb984SSergei Barannikov } 696992cb984SSergei Barannikov 69708a22076SJohannes Doerfert void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr( 69808a22076SJohannes Doerfert llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS, 6990ba57c8bSJohannes Doerfert const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal, 7000ba57c8bSJohannes Doerfert int32_t *MaxThreadsVal) { 70108a22076SJohannes Doerfert unsigned Min = 0; 70208a22076SJohannes Doerfert unsigned Max = 0; 70308a22076SJohannes Doerfert if (FlatWGS) { 70408a22076SJohannes Doerfert Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); 70508a22076SJohannes Doerfert Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue(); 70608a22076SJohannes Doerfert } 70708a22076SJohannes Doerfert if (ReqdWGS && Min == 0 && Max == 0) 70808a22076SJohannes Doerfert Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); 70908a22076SJohannes Doerfert 71008a22076SJohannes Doerfert if (Min != 0) { 71108a22076SJohannes Doerfert assert(Min <= Max && "Min must be less than or equal Max"); 71208a22076SJohannes Doerfert 7130ba57c8bSJohannes Doerfert if (MinThreadsVal) 7140ba57c8bSJohannes Doerfert *MinThreadsVal = Min; 7150ba57c8bSJohannes Doerfert if (MaxThreadsVal) 7160ba57c8bSJohannes Doerfert *MaxThreadsVal = Max; 71708a22076SJohannes Doerfert std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); 7180ba57c8bSJohannes Doerfert if (F) 71908a22076SJohannes Doerfert F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); 72008a22076SJohannes Doerfert } else 72108a22076SJohannes Doerfert assert(Max == 0 && "Max must be zero"); 72208a22076SJohannes Doerfert } 72308a22076SJohannes Doerfert 72408a22076SJohannes Doerfert void CodeGenModule::handleAMDGPUWavesPerEUAttr( 72508a22076SJohannes Doerfert llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) { 72608a22076SJohannes Doerfert unsigned Min = 72708a22076SJohannes Doerfert Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); 72808a22076SJohannes Doerfert unsigned Max = 72908a22076SJohannes Doerfert Attr->getMax() 73008a22076SJohannes Doerfert ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue() 73108a22076SJohannes Doerfert : 0; 73208a22076SJohannes Doerfert 73308a22076SJohannes Doerfert if (Min != 0) { 73408a22076SJohannes Doerfert assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); 73508a22076SJohannes Doerfert 73608a22076SJohannes Doerfert std::string AttrVal = llvm::utostr(Min); 73708a22076SJohannes Doerfert if (Max != 0) 73808a22076SJohannes Doerfert AttrVal = AttrVal + "," + llvm::utostr(Max); 73908a22076SJohannes Doerfert F->addFnAttr("amdgpu-waves-per-eu", AttrVal); 74008a22076SJohannes Doerfert } else 74108a22076SJohannes Doerfert assert(Max == 0 && "Max must be zero"); 74208a22076SJohannes Doerfert } 74308a22076SJohannes Doerfert 744992cb984SSergei Barannikov std::unique_ptr<TargetCodeGenInfo> 745992cb984SSergei Barannikov CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) { 746992cb984SSergei Barannikov return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes()); 747992cb984SSergei Barannikov } 748