106c3fb27SDimitry Andric //===- AMDGPU.cpp ---------------------------------------------------------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric 906c3fb27SDimitry Andric #include "ABIInfoImpl.h" 1006c3fb27SDimitry Andric #include "TargetInfo.h" 115f757f3fSDimitry Andric #include "clang/Basic/TargetOptions.h" 1206c3fb27SDimitry Andric 1306c3fb27SDimitry Andric using namespace clang; 1406c3fb27SDimitry Andric using namespace clang::CodeGen; 1506c3fb27SDimitry Andric 1606c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1706c3fb27SDimitry Andric // AMDGPU ABI Implementation 1806c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1906c3fb27SDimitry Andric 2006c3fb27SDimitry Andric namespace { 2106c3fb27SDimitry Andric 2206c3fb27SDimitry Andric class AMDGPUABIInfo final : public DefaultABIInfo { 2306c3fb27SDimitry Andric private: 2406c3fb27SDimitry Andric static const unsigned MaxNumRegsForArgsRet = 16; 2506c3fb27SDimitry Andric 2606c3fb27SDimitry Andric unsigned numRegsForType(QualType Ty) const; 2706c3fb27SDimitry Andric 2806c3fb27SDimitry Andric bool isHomogeneousAggregateBaseType(QualType Ty) const override; 2906c3fb27SDimitry Andric bool isHomogeneousAggregateSmallEnough(const Type *Base, 3006c3fb27SDimitry Andric uint64_t Members) const override; 3106c3fb27SDimitry Andric 3206c3fb27SDimitry Andric // Coerce HIP scalar pointer arguments from generic pointers to global ones. 3306c3fb27SDimitry Andric llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, 3406c3fb27SDimitry Andric unsigned ToAS) const { 3506c3fb27SDimitry Andric // Single value types. 3606c3fb27SDimitry Andric auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty); 3706c3fb27SDimitry Andric if (PtrTy && PtrTy->getAddressSpace() == FromAS) 3806c3fb27SDimitry Andric return llvm::PointerType::get(Ty->getContext(), ToAS); 3906c3fb27SDimitry Andric return Ty; 4006c3fb27SDimitry Andric } 4106c3fb27SDimitry Andric 4206c3fb27SDimitry Andric public: 4306c3fb27SDimitry Andric explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : 4406c3fb27SDimitry Andric DefaultABIInfo(CGT) {} 4506c3fb27SDimitry Andric 4606c3fb27SDimitry Andric ABIArgInfo classifyReturnType(QualType RetTy) const; 4706c3fb27SDimitry Andric ABIArgInfo classifyKernelArgumentType(QualType Ty) const; 48*0fca6ea1SDimitry Andric ABIArgInfo classifyArgumentType(QualType Ty, bool Variadic, 49*0fca6ea1SDimitry Andric unsigned &NumRegsLeft) const; 5006c3fb27SDimitry Andric 5106c3fb27SDimitry Andric void computeInfo(CGFunctionInfo &FI) const override; 52*0fca6ea1SDimitry Andric RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 53*0fca6ea1SDimitry Andric AggValueSlot Slot) const override; 5406c3fb27SDimitry Andric }; 5506c3fb27SDimitry Andric 5606c3fb27SDimitry Andric bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { 5706c3fb27SDimitry Andric return true; 5806c3fb27SDimitry Andric } 5906c3fb27SDimitry Andric 6006c3fb27SDimitry Andric bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( 6106c3fb27SDimitry Andric const Type *Base, uint64_t Members) const { 6206c3fb27SDimitry Andric uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; 6306c3fb27SDimitry Andric 6406c3fb27SDimitry Andric // Homogeneous Aggregates may occupy at most 16 registers. 6506c3fb27SDimitry Andric return Members * NumRegs <= MaxNumRegsForArgsRet; 6606c3fb27SDimitry Andric } 6706c3fb27SDimitry Andric 6806c3fb27SDimitry Andric /// Estimate number of registers the type will use when passed in registers. 6906c3fb27SDimitry Andric unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { 7006c3fb27SDimitry Andric unsigned NumRegs = 0; 7106c3fb27SDimitry Andric 7206c3fb27SDimitry Andric if (const VectorType *VT = Ty->getAs<VectorType>()) { 7306c3fb27SDimitry Andric // Compute from the number of elements. The reported size is based on the 7406c3fb27SDimitry Andric // in-memory size, which includes the padding 4th element for 3-vectors. 7506c3fb27SDimitry Andric QualType EltTy = VT->getElementType(); 7606c3fb27SDimitry Andric unsigned EltSize = getContext().getTypeSize(EltTy); 7706c3fb27SDimitry Andric 7806c3fb27SDimitry Andric // 16-bit element vectors should be passed as packed. 7906c3fb27SDimitry Andric if (EltSize == 16) 8006c3fb27SDimitry Andric return (VT->getNumElements() + 1) / 2; 8106c3fb27SDimitry Andric 8206c3fb27SDimitry Andric unsigned EltNumRegs = (EltSize + 31) / 32; 8306c3fb27SDimitry Andric return EltNumRegs * VT->getNumElements(); 8406c3fb27SDimitry Andric } 8506c3fb27SDimitry Andric 8606c3fb27SDimitry Andric if (const RecordType *RT = Ty->getAs<RecordType>()) { 8706c3fb27SDimitry Andric const RecordDecl *RD = RT->getDecl(); 8806c3fb27SDimitry Andric assert(!RD->hasFlexibleArrayMember()); 8906c3fb27SDimitry Andric 9006c3fb27SDimitry Andric for (const FieldDecl *Field : RD->fields()) { 9106c3fb27SDimitry Andric QualType FieldTy = Field->getType(); 9206c3fb27SDimitry Andric NumRegs += numRegsForType(FieldTy); 9306c3fb27SDimitry Andric } 9406c3fb27SDimitry Andric 9506c3fb27SDimitry Andric return NumRegs; 9606c3fb27SDimitry Andric } 9706c3fb27SDimitry Andric 9806c3fb27SDimitry Andric return (getContext().getTypeSize(Ty) + 31) / 32; 9906c3fb27SDimitry Andric } 10006c3fb27SDimitry Andric 10106c3fb27SDimitry Andric void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { 10206c3fb27SDimitry Andric llvm::CallingConv::ID CC = FI.getCallingConvention(); 10306c3fb27SDimitry Andric 10406c3fb27SDimitry Andric if (!getCXXABI().classifyReturnType(FI)) 10506c3fb27SDimitry Andric FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 10606c3fb27SDimitry Andric 107*0fca6ea1SDimitry Andric unsigned ArgumentIndex = 0; 108*0fca6ea1SDimitry Andric const unsigned numFixedArguments = FI.getNumRequiredArgs(); 109*0fca6ea1SDimitry Andric 11006c3fb27SDimitry Andric unsigned NumRegsLeft = MaxNumRegsForArgsRet; 11106c3fb27SDimitry Andric for (auto &Arg : FI.arguments()) { 11206c3fb27SDimitry Andric if (CC == llvm::CallingConv::AMDGPU_KERNEL) { 11306c3fb27SDimitry Andric Arg.info = classifyKernelArgumentType(Arg.type); 11406c3fb27SDimitry Andric } else { 115*0fca6ea1SDimitry Andric bool FixedArgument = ArgumentIndex++ < numFixedArguments; 116*0fca6ea1SDimitry Andric Arg.info = classifyArgumentType(Arg.type, !FixedArgument, NumRegsLeft); 11706c3fb27SDimitry Andric } 11806c3fb27SDimitry Andric } 11906c3fb27SDimitry Andric } 12006c3fb27SDimitry Andric 121*0fca6ea1SDimitry Andric RValue AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 122*0fca6ea1SDimitry Andric QualType Ty, AggValueSlot Slot) const { 123*0fca6ea1SDimitry Andric const bool IsIndirect = false; 124*0fca6ea1SDimitry Andric const bool AllowHigherAlign = false; 125*0fca6ea1SDimitry Andric return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 126*0fca6ea1SDimitry Andric getContext().getTypeInfoInChars(Ty), 127*0fca6ea1SDimitry Andric CharUnits::fromQuantity(4), AllowHigherAlign, Slot); 12806c3fb27SDimitry Andric } 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { 13106c3fb27SDimitry Andric if (isAggregateTypeForABI(RetTy)) { 13206c3fb27SDimitry Andric // Records with non-trivial destructors/copy-constructors should not be 13306c3fb27SDimitry Andric // returned by value. 13406c3fb27SDimitry Andric if (!getRecordArgABI(RetTy, getCXXABI())) { 13506c3fb27SDimitry Andric // Ignore empty structs/unions. 13606c3fb27SDimitry Andric if (isEmptyRecord(getContext(), RetTy, true)) 13706c3fb27SDimitry Andric return ABIArgInfo::getIgnore(); 13806c3fb27SDimitry Andric 13906c3fb27SDimitry Andric // Lower single-element structs to just return a regular value. 14006c3fb27SDimitry Andric if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) 14106c3fb27SDimitry Andric return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 14206c3fb27SDimitry Andric 14306c3fb27SDimitry Andric if (const RecordType *RT = RetTy->getAs<RecordType>()) { 14406c3fb27SDimitry Andric const RecordDecl *RD = RT->getDecl(); 14506c3fb27SDimitry Andric if (RD->hasFlexibleArrayMember()) 14606c3fb27SDimitry Andric return DefaultABIInfo::classifyReturnType(RetTy); 14706c3fb27SDimitry Andric } 14806c3fb27SDimitry Andric 14906c3fb27SDimitry Andric // Pack aggregates <= 4 bytes into single VGPR or pair. 15006c3fb27SDimitry Andric uint64_t Size = getContext().getTypeSize(RetTy); 15106c3fb27SDimitry Andric if (Size <= 16) 15206c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); 15306c3fb27SDimitry Andric 15406c3fb27SDimitry Andric if (Size <= 32) 15506c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); 15606c3fb27SDimitry Andric 15706c3fb27SDimitry Andric if (Size <= 64) { 15806c3fb27SDimitry Andric llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); 15906c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); 16006c3fb27SDimitry Andric } 16106c3fb27SDimitry Andric 16206c3fb27SDimitry Andric if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) 16306c3fb27SDimitry Andric return ABIArgInfo::getDirect(); 16406c3fb27SDimitry Andric } 16506c3fb27SDimitry Andric } 16606c3fb27SDimitry Andric 16706c3fb27SDimitry Andric // Otherwise just do the default thing. 16806c3fb27SDimitry Andric return DefaultABIInfo::classifyReturnType(RetTy); 16906c3fb27SDimitry Andric } 17006c3fb27SDimitry Andric 17106c3fb27SDimitry Andric /// For kernels all parameters are really passed in a special buffer. It doesn't 17206c3fb27SDimitry Andric /// make sense to pass anything byval, so everything must be direct. 17306c3fb27SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { 17406c3fb27SDimitry Andric Ty = useFirstFieldIfTransparentUnion(Ty); 17506c3fb27SDimitry Andric 17606c3fb27SDimitry Andric // TODO: Can we omit empty structs? 17706c3fb27SDimitry Andric 17806c3fb27SDimitry Andric if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) 17906c3fb27SDimitry Andric Ty = QualType(SeltTy, 0); 18006c3fb27SDimitry Andric 18106c3fb27SDimitry Andric llvm::Type *OrigLTy = CGT.ConvertType(Ty); 18206c3fb27SDimitry Andric llvm::Type *LTy = OrigLTy; 18306c3fb27SDimitry Andric if (getContext().getLangOpts().HIP) { 18406c3fb27SDimitry Andric LTy = coerceKernelArgumentType( 18506c3fb27SDimitry Andric OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), 18606c3fb27SDimitry Andric /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); 18706c3fb27SDimitry Andric } 18806c3fb27SDimitry Andric 18906c3fb27SDimitry Andric // FIXME: Should also use this for OpenCL, but it requires addressing the 19006c3fb27SDimitry Andric // problem of kernels being called. 19106c3fb27SDimitry Andric // 19206c3fb27SDimitry Andric // FIXME: This doesn't apply the optimization of coercing pointers in structs 19306c3fb27SDimitry Andric // to global address space when using byref. This would require implementing a 19406c3fb27SDimitry Andric // new kind of coercion of the in-memory type when for indirect arguments. 19506c3fb27SDimitry Andric if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy && 19606c3fb27SDimitry Andric isAggregateTypeForABI(Ty)) { 19706c3fb27SDimitry Andric return ABIArgInfo::getIndirectAliased( 19806c3fb27SDimitry Andric getContext().getTypeAlignInChars(Ty), 19906c3fb27SDimitry Andric getContext().getTargetAddressSpace(LangAS::opencl_constant), 20006c3fb27SDimitry Andric false /*Realign*/, nullptr /*Padding*/); 20106c3fb27SDimitry Andric } 20206c3fb27SDimitry Andric 20306c3fb27SDimitry Andric // If we set CanBeFlattened to true, CodeGen will expand the struct to its 20406c3fb27SDimitry Andric // individual elements, which confuses the Clover OpenCL backend; therefore we 20506c3fb27SDimitry Andric // have to set it to false here. Other args of getDirect() are just defaults. 20606c3fb27SDimitry Andric return ABIArgInfo::getDirect(LTy, 0, nullptr, false); 20706c3fb27SDimitry Andric } 20806c3fb27SDimitry Andric 209*0fca6ea1SDimitry Andric ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic, 21006c3fb27SDimitry Andric unsigned &NumRegsLeft) const { 21106c3fb27SDimitry Andric assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); 21206c3fb27SDimitry Andric 21306c3fb27SDimitry Andric Ty = useFirstFieldIfTransparentUnion(Ty); 21406c3fb27SDimitry Andric 215*0fca6ea1SDimitry Andric if (Variadic) { 216*0fca6ea1SDimitry Andric return ABIArgInfo::getDirect(/*T=*/nullptr, 217*0fca6ea1SDimitry Andric /*Offset=*/0, 218*0fca6ea1SDimitry Andric /*Padding=*/nullptr, 219*0fca6ea1SDimitry Andric /*CanBeFlattened=*/false, 220*0fca6ea1SDimitry Andric /*Align=*/0); 221*0fca6ea1SDimitry Andric } 222*0fca6ea1SDimitry Andric 22306c3fb27SDimitry Andric if (isAggregateTypeForABI(Ty)) { 22406c3fb27SDimitry Andric // Records with non-trivial destructors/copy-constructors should not be 22506c3fb27SDimitry Andric // passed by value. 22606c3fb27SDimitry Andric if (auto RAA = getRecordArgABI(Ty, getCXXABI())) 22706c3fb27SDimitry Andric return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 22806c3fb27SDimitry Andric 22906c3fb27SDimitry Andric // Ignore empty structs/unions. 23006c3fb27SDimitry Andric if (isEmptyRecord(getContext(), Ty, true)) 23106c3fb27SDimitry Andric return ABIArgInfo::getIgnore(); 23206c3fb27SDimitry Andric 23306c3fb27SDimitry Andric // Lower single-element structs to just pass a regular value. TODO: We 23406c3fb27SDimitry Andric // could do reasonable-size multiple-element structs too, using getExpand(), 23506c3fb27SDimitry Andric // though watch out for things like bitfields. 23606c3fb27SDimitry Andric if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) 23706c3fb27SDimitry Andric return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 23806c3fb27SDimitry Andric 23906c3fb27SDimitry Andric if (const RecordType *RT = Ty->getAs<RecordType>()) { 24006c3fb27SDimitry Andric const RecordDecl *RD = RT->getDecl(); 24106c3fb27SDimitry Andric if (RD->hasFlexibleArrayMember()) 24206c3fb27SDimitry Andric return DefaultABIInfo::classifyArgumentType(Ty); 24306c3fb27SDimitry Andric } 24406c3fb27SDimitry Andric 24506c3fb27SDimitry Andric // Pack aggregates <= 8 bytes into single VGPR or pair. 24606c3fb27SDimitry Andric uint64_t Size = getContext().getTypeSize(Ty); 24706c3fb27SDimitry Andric if (Size <= 64) { 24806c3fb27SDimitry Andric unsigned NumRegs = (Size + 31) / 32; 24906c3fb27SDimitry Andric NumRegsLeft -= std::min(NumRegsLeft, NumRegs); 25006c3fb27SDimitry Andric 25106c3fb27SDimitry Andric if (Size <= 16) 25206c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); 25306c3fb27SDimitry Andric 25406c3fb27SDimitry Andric if (Size <= 32) 25506c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); 25606c3fb27SDimitry Andric 25706c3fb27SDimitry Andric // XXX: Should this be i64 instead, and should the limit increase? 25806c3fb27SDimitry Andric llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); 25906c3fb27SDimitry Andric return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); 26006c3fb27SDimitry Andric } 26106c3fb27SDimitry Andric 26206c3fb27SDimitry Andric if (NumRegsLeft > 0) { 26306c3fb27SDimitry Andric unsigned NumRegs = numRegsForType(Ty); 26406c3fb27SDimitry Andric if (NumRegsLeft >= NumRegs) { 26506c3fb27SDimitry Andric NumRegsLeft -= NumRegs; 26606c3fb27SDimitry Andric return ABIArgInfo::getDirect(); 26706c3fb27SDimitry Andric } 26806c3fb27SDimitry Andric } 2695f757f3fSDimitry Andric 2705f757f3fSDimitry Andric // Use pass-by-reference in stead of pass-by-value for struct arguments in 2715f757f3fSDimitry Andric // function ABI. 2725f757f3fSDimitry Andric return ABIArgInfo::getIndirectAliased( 2735f757f3fSDimitry Andric getContext().getTypeAlignInChars(Ty), 2745f757f3fSDimitry Andric getContext().getTargetAddressSpace(LangAS::opencl_private)); 27506c3fb27SDimitry Andric } 27606c3fb27SDimitry Andric 27706c3fb27SDimitry Andric // Otherwise just do the default thing. 27806c3fb27SDimitry Andric ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); 27906c3fb27SDimitry Andric if (!ArgInfo.isIndirect()) { 28006c3fb27SDimitry Andric unsigned NumRegs = numRegsForType(Ty); 28106c3fb27SDimitry Andric NumRegsLeft -= std::min(NumRegs, NumRegsLeft); 28206c3fb27SDimitry Andric } 28306c3fb27SDimitry Andric 28406c3fb27SDimitry Andric return ArgInfo; 28506c3fb27SDimitry Andric } 28606c3fb27SDimitry Andric 28706c3fb27SDimitry Andric class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { 28806c3fb27SDimitry Andric public: 28906c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) 29006c3fb27SDimitry Andric : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {} 29106c3fb27SDimitry Andric 29206c3fb27SDimitry Andric void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, 29306c3fb27SDimitry Andric CodeGenModule &CGM) const; 29406c3fb27SDimitry Andric 2955f757f3fSDimitry Andric void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override; 2965f757f3fSDimitry Andric 29706c3fb27SDimitry Andric void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 29806c3fb27SDimitry Andric CodeGen::CodeGenModule &M) const override; 29906c3fb27SDimitry Andric unsigned getOpenCLKernelCallingConv() const override; 30006c3fb27SDimitry Andric 30106c3fb27SDimitry Andric llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, 30206c3fb27SDimitry Andric llvm::PointerType *T, QualType QT) const override; 30306c3fb27SDimitry Andric 30406c3fb27SDimitry Andric LangAS getASTAllocaAddressSpace() const override { 30506c3fb27SDimitry Andric return getLangASFromTargetAS( 30606c3fb27SDimitry Andric getABIInfo().getDataLayout().getAllocaAddrSpace()); 30706c3fb27SDimitry Andric } 30806c3fb27SDimitry Andric LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, 30906c3fb27SDimitry Andric const VarDecl *D) const override; 31006c3fb27SDimitry Andric llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, 31106c3fb27SDimitry Andric SyncScope Scope, 31206c3fb27SDimitry Andric llvm::AtomicOrdering Ordering, 31306c3fb27SDimitry Andric llvm::LLVMContext &Ctx) const override; 31406c3fb27SDimitry Andric llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF, 31506c3fb27SDimitry Andric llvm::Function *BlockInvokeFunc, 31606c3fb27SDimitry Andric llvm::Type *BlockTy) const override; 31706c3fb27SDimitry Andric bool shouldEmitStaticExternCAliases() const override; 31806c3fb27SDimitry Andric bool shouldEmitDWARFBitFieldSeparators() const override; 31906c3fb27SDimitry Andric void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; 32006c3fb27SDimitry Andric }; 32106c3fb27SDimitry Andric } 32206c3fb27SDimitry Andric 32306c3fb27SDimitry Andric static bool requiresAMDGPUProtectedVisibility(const Decl *D, 32406c3fb27SDimitry Andric llvm::GlobalValue *GV) { 32506c3fb27SDimitry Andric if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) 32606c3fb27SDimitry Andric return false; 32706c3fb27SDimitry Andric 3285f757f3fSDimitry Andric return !D->hasAttr<OMPDeclareTargetDeclAttr>() && 3295f757f3fSDimitry Andric (D->hasAttr<OpenCLKernelAttr>() || 33006c3fb27SDimitry Andric (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || 33106c3fb27SDimitry Andric (isa<VarDecl>(D) && 33206c3fb27SDimitry Andric (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || 33306c3fb27SDimitry Andric cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() || 3345f757f3fSDimitry Andric cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()))); 33506c3fb27SDimitry Andric } 33606c3fb27SDimitry Andric 33706c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( 33806c3fb27SDimitry Andric const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const { 33906c3fb27SDimitry Andric const auto *ReqdWGS = 34006c3fb27SDimitry Andric M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; 34106c3fb27SDimitry Andric const bool IsOpenCLKernel = 34206c3fb27SDimitry Andric M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); 34306c3fb27SDimitry Andric const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>(); 34406c3fb27SDimitry Andric 34506c3fb27SDimitry Andric const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); 34606c3fb27SDimitry Andric if (ReqdWGS || FlatWGS) { 3475f757f3fSDimitry Andric M.handleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS); 34806c3fb27SDimitry Andric } else if (IsOpenCLKernel || IsHIPKernel) { 34906c3fb27SDimitry Andric // By default, restrict the maximum size to a value specified by 35006c3fb27SDimitry Andric // --gpu-max-threads-per-block=n or its default value for HIP. 35106c3fb27SDimitry Andric const unsigned OpenCLDefaultMaxWorkGroupSize = 256; 35206c3fb27SDimitry Andric const unsigned DefaultMaxWorkGroupSize = 35306c3fb27SDimitry Andric IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize 35406c3fb27SDimitry Andric : M.getLangOpts().GPUMaxThreadsPerBlock; 35506c3fb27SDimitry Andric std::string AttrVal = 35606c3fb27SDimitry Andric std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); 35706c3fb27SDimitry Andric F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); 35806c3fb27SDimitry Andric } 35906c3fb27SDimitry Andric 3605f757f3fSDimitry Andric if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) 3615f757f3fSDimitry Andric M.handleAMDGPUWavesPerEUAttr(F, Attr); 36206c3fb27SDimitry Andric 36306c3fb27SDimitry Andric if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { 36406c3fb27SDimitry Andric unsigned NumSGPR = Attr->getNumSGPR(); 36506c3fb27SDimitry Andric 36606c3fb27SDimitry Andric if (NumSGPR != 0) 36706c3fb27SDimitry Andric F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); 36806c3fb27SDimitry Andric } 36906c3fb27SDimitry Andric 37006c3fb27SDimitry Andric if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { 37106c3fb27SDimitry Andric uint32_t NumVGPR = Attr->getNumVGPR(); 37206c3fb27SDimitry Andric 37306c3fb27SDimitry Andric if (NumVGPR != 0) 37406c3fb27SDimitry Andric F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); 37506c3fb27SDimitry Andric } 376*0fca6ea1SDimitry Andric 377*0fca6ea1SDimitry Andric if (const auto *Attr = FD->getAttr<AMDGPUMaxNumWorkGroupsAttr>()) { 378*0fca6ea1SDimitry Andric uint32_t X = Attr->getMaxNumWorkGroupsX() 379*0fca6ea1SDimitry Andric ->EvaluateKnownConstInt(M.getContext()) 380*0fca6ea1SDimitry Andric .getExtValue(); 381*0fca6ea1SDimitry Andric // Y and Z dimensions default to 1 if not specified 382*0fca6ea1SDimitry Andric uint32_t Y = Attr->getMaxNumWorkGroupsY() 383*0fca6ea1SDimitry Andric ? Attr->getMaxNumWorkGroupsY() 384*0fca6ea1SDimitry Andric ->EvaluateKnownConstInt(M.getContext()) 385*0fca6ea1SDimitry Andric .getExtValue() 386*0fca6ea1SDimitry Andric : 1; 387*0fca6ea1SDimitry Andric uint32_t Z = Attr->getMaxNumWorkGroupsZ() 388*0fca6ea1SDimitry Andric ? Attr->getMaxNumWorkGroupsZ() 389*0fca6ea1SDimitry Andric ->EvaluateKnownConstInt(M.getContext()) 390*0fca6ea1SDimitry Andric .getExtValue() 391*0fca6ea1SDimitry Andric : 1; 392*0fca6ea1SDimitry Andric 393*0fca6ea1SDimitry Andric llvm::SmallString<32> AttrVal; 394*0fca6ea1SDimitry Andric llvm::raw_svector_ostream OS(AttrVal); 395*0fca6ea1SDimitry Andric OS << X << ',' << Y << ',' << Z; 396*0fca6ea1SDimitry Andric 397*0fca6ea1SDimitry Andric F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str()); 398*0fca6ea1SDimitry Andric } 39906c3fb27SDimitry Andric } 40006c3fb27SDimitry Andric 4015f757f3fSDimitry Andric /// Emits control constants used to change per-architecture behaviour in the 4025f757f3fSDimitry Andric /// AMDGPU ROCm device libraries. 4035f757f3fSDimitry Andric void AMDGPUTargetCodeGenInfo::emitTargetGlobals( 4045f757f3fSDimitry Andric CodeGen::CodeGenModule &CGM) const { 4055f757f3fSDimitry Andric StringRef Name = "__oclc_ABI_version"; 4065f757f3fSDimitry Andric llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name); 4075f757f3fSDimitry Andric if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage())) 4085f757f3fSDimitry Andric return; 4095f757f3fSDimitry Andric 4105f757f3fSDimitry Andric if (CGM.getTarget().getTargetOpts().CodeObjectVersion == 4115f757f3fSDimitry Andric llvm::CodeObjectVersionKind::COV_None) 4125f757f3fSDimitry Andric return; 4135f757f3fSDimitry Andric 4145f757f3fSDimitry Andric auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32); 4155f757f3fSDimitry Andric llvm::Constant *COV = llvm::ConstantInt::get( 4165f757f3fSDimitry Andric Type, CGM.getTarget().getTargetOpts().CodeObjectVersion); 4175f757f3fSDimitry Andric 4185f757f3fSDimitry Andric // It needs to be constant weak_odr without externally_initialized so that 4195f757f3fSDimitry Andric // the load instuction can be eliminated by the IPSCCP. 4205f757f3fSDimitry Andric auto *GV = new llvm::GlobalVariable( 4215f757f3fSDimitry Andric CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name, 4225f757f3fSDimitry Andric nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4235f757f3fSDimitry Andric CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); 4245f757f3fSDimitry Andric GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); 4255f757f3fSDimitry Andric GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility); 4265f757f3fSDimitry Andric 4275f757f3fSDimitry Andric // Replace any external references to this variable with the new global. 4285f757f3fSDimitry Andric if (OriginalGV) { 4295f757f3fSDimitry Andric OriginalGV->replaceAllUsesWith(GV); 4305f757f3fSDimitry Andric GV->takeName(OriginalGV); 4315f757f3fSDimitry Andric OriginalGV->eraseFromParent(); 4325f757f3fSDimitry Andric } 4335f757f3fSDimitry Andric } 4345f757f3fSDimitry Andric 43506c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setTargetAttributes( 43606c3fb27SDimitry Andric const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { 43706c3fb27SDimitry Andric if (requiresAMDGPUProtectedVisibility(D, GV)) { 43806c3fb27SDimitry Andric GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); 43906c3fb27SDimitry Andric GV->setDSOLocal(true); 44006c3fb27SDimitry Andric } 44106c3fb27SDimitry Andric 44206c3fb27SDimitry Andric if (GV->isDeclaration()) 44306c3fb27SDimitry Andric return; 44406c3fb27SDimitry Andric 44506c3fb27SDimitry Andric llvm::Function *F = dyn_cast<llvm::Function>(GV); 44606c3fb27SDimitry Andric if (!F) 44706c3fb27SDimitry Andric return; 44806c3fb27SDimitry Andric 44906c3fb27SDimitry Andric const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); 45006c3fb27SDimitry Andric if (FD) 45106c3fb27SDimitry Andric setFunctionDeclAttributes(FD, F, M); 45206c3fb27SDimitry Andric 45306c3fb27SDimitry Andric if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) 45406c3fb27SDimitry Andric F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); 45506c3fb27SDimitry Andric 45606c3fb27SDimitry Andric if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) 45706c3fb27SDimitry Andric F->addFnAttr("amdgpu-ieee", "false"); 45806c3fb27SDimitry Andric } 45906c3fb27SDimitry Andric 46006c3fb27SDimitry Andric unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { 46106c3fb27SDimitry Andric return llvm::CallingConv::AMDGPU_KERNEL; 46206c3fb27SDimitry Andric } 46306c3fb27SDimitry Andric 46406c3fb27SDimitry Andric // Currently LLVM assumes null pointers always have value 0, 46506c3fb27SDimitry Andric // which results in incorrectly transformed IR. Therefore, instead of 46606c3fb27SDimitry Andric // emitting null pointers in private and local address spaces, a null 46706c3fb27SDimitry Andric // pointer in generic address space is emitted which is casted to a 46806c3fb27SDimitry Andric // pointer in local or private address space. 46906c3fb27SDimitry Andric llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( 47006c3fb27SDimitry Andric const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT, 47106c3fb27SDimitry Andric QualType QT) const { 47206c3fb27SDimitry Andric if (CGM.getContext().getTargetNullPointerValue(QT) == 0) 47306c3fb27SDimitry Andric return llvm::ConstantPointerNull::get(PT); 47406c3fb27SDimitry Andric 47506c3fb27SDimitry Andric auto &Ctx = CGM.getContext(); 47606c3fb27SDimitry Andric auto NPT = llvm::PointerType::get( 47706c3fb27SDimitry Andric PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); 47806c3fb27SDimitry Andric return llvm::ConstantExpr::getAddrSpaceCast( 47906c3fb27SDimitry Andric llvm::ConstantPointerNull::get(NPT), PT); 48006c3fb27SDimitry Andric } 48106c3fb27SDimitry Andric 48206c3fb27SDimitry Andric LangAS 48306c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, 48406c3fb27SDimitry Andric const VarDecl *D) const { 48506c3fb27SDimitry Andric assert(!CGM.getLangOpts().OpenCL && 48606c3fb27SDimitry Andric !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && 48706c3fb27SDimitry Andric "Address space agnostic languages only"); 48806c3fb27SDimitry Andric LangAS DefaultGlobalAS = getLangASFromTargetAS( 48906c3fb27SDimitry Andric CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); 49006c3fb27SDimitry Andric if (!D) 49106c3fb27SDimitry Andric return DefaultGlobalAS; 49206c3fb27SDimitry Andric 49306c3fb27SDimitry Andric LangAS AddrSpace = D->getType().getAddressSpace(); 49406c3fb27SDimitry Andric if (AddrSpace != LangAS::Default) 49506c3fb27SDimitry Andric return AddrSpace; 49606c3fb27SDimitry Andric 49706c3fb27SDimitry Andric // Only promote to address space 4 if VarDecl has constant initialization. 4985f757f3fSDimitry Andric if (D->getType().isConstantStorage(CGM.getContext(), false, false) && 49906c3fb27SDimitry Andric D->hasConstantInitialization()) { 50006c3fb27SDimitry Andric if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) 50106c3fb27SDimitry Andric return *ConstAS; 50206c3fb27SDimitry Andric } 50306c3fb27SDimitry Andric return DefaultGlobalAS; 50406c3fb27SDimitry Andric } 50506c3fb27SDimitry Andric 50606c3fb27SDimitry Andric llvm::SyncScope::ID 50706c3fb27SDimitry Andric AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, 50806c3fb27SDimitry Andric SyncScope Scope, 50906c3fb27SDimitry Andric llvm::AtomicOrdering Ordering, 51006c3fb27SDimitry Andric llvm::LLVMContext &Ctx) const { 51106c3fb27SDimitry Andric std::string Name; 51206c3fb27SDimitry Andric switch (Scope) { 51306c3fb27SDimitry Andric case SyncScope::HIPSingleThread: 5145f757f3fSDimitry Andric case SyncScope::SingleScope: 51506c3fb27SDimitry Andric Name = "singlethread"; 51606c3fb27SDimitry Andric break; 51706c3fb27SDimitry Andric case SyncScope::HIPWavefront: 51806c3fb27SDimitry Andric case SyncScope::OpenCLSubGroup: 5195f757f3fSDimitry Andric case SyncScope::WavefrontScope: 52006c3fb27SDimitry Andric Name = "wavefront"; 52106c3fb27SDimitry Andric break; 52206c3fb27SDimitry Andric case SyncScope::HIPWorkgroup: 52306c3fb27SDimitry Andric case SyncScope::OpenCLWorkGroup: 5245f757f3fSDimitry Andric case SyncScope::WorkgroupScope: 52506c3fb27SDimitry Andric Name = "workgroup"; 52606c3fb27SDimitry Andric break; 52706c3fb27SDimitry Andric case SyncScope::HIPAgent: 52806c3fb27SDimitry Andric case SyncScope::OpenCLDevice: 5295f757f3fSDimitry Andric case SyncScope::DeviceScope: 53006c3fb27SDimitry Andric Name = "agent"; 53106c3fb27SDimitry Andric break; 5325f757f3fSDimitry Andric case SyncScope::SystemScope: 53306c3fb27SDimitry Andric case SyncScope::HIPSystem: 53406c3fb27SDimitry Andric case SyncScope::OpenCLAllSVMDevices: 53506c3fb27SDimitry Andric Name = ""; 53606c3fb27SDimitry Andric break; 53706c3fb27SDimitry Andric } 53806c3fb27SDimitry Andric 53906c3fb27SDimitry Andric if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { 54006c3fb27SDimitry Andric if (!Name.empty()) 54106c3fb27SDimitry Andric Name = Twine(Twine(Name) + Twine("-")).str(); 54206c3fb27SDimitry Andric 54306c3fb27SDimitry Andric Name = Twine(Twine(Name) + Twine("one-as")).str(); 54406c3fb27SDimitry Andric } 54506c3fb27SDimitry Andric 54606c3fb27SDimitry Andric return Ctx.getOrInsertSyncScopeID(Name); 54706c3fb27SDimitry Andric } 54806c3fb27SDimitry Andric 54906c3fb27SDimitry Andric bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { 55006c3fb27SDimitry Andric return false; 55106c3fb27SDimitry Andric } 55206c3fb27SDimitry Andric 55306c3fb27SDimitry Andric bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const { 55406c3fb27SDimitry Andric return true; 55506c3fb27SDimitry Andric } 55606c3fb27SDimitry Andric 55706c3fb27SDimitry Andric void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( 55806c3fb27SDimitry Andric const FunctionType *&FT) const { 55906c3fb27SDimitry Andric FT = getABIInfo().getContext().adjustFunctionType( 56006c3fb27SDimitry Andric FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); 56106c3fb27SDimitry Andric } 56206c3fb27SDimitry Andric 56306c3fb27SDimitry Andric /// Create an OpenCL kernel for an enqueued block. 56406c3fb27SDimitry Andric /// 56506c3fb27SDimitry Andric /// The type of the first argument (the block literal) is the struct type 56606c3fb27SDimitry Andric /// of the block literal instead of a pointer type. The first argument 56706c3fb27SDimitry Andric /// (block literal) is passed directly by value to the kernel. The kernel 56806c3fb27SDimitry Andric /// allocates the same type of struct on stack and stores the block literal 56906c3fb27SDimitry Andric /// to it and passes its pointer to the block invoke function. The kernel 57006c3fb27SDimitry Andric /// has "enqueued-block" function attribute and kernel argument metadata. 57106c3fb27SDimitry Andric llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( 57206c3fb27SDimitry Andric CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const { 57306c3fb27SDimitry Andric auto &Builder = CGF.Builder; 57406c3fb27SDimitry Andric auto &C = CGF.getLLVMContext(); 57506c3fb27SDimitry Andric 57606c3fb27SDimitry Andric auto *InvokeFT = Invoke->getFunctionType(); 57706c3fb27SDimitry Andric llvm::SmallVector<llvm::Type *, 2> ArgTys; 57806c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> AddressQuals; 57906c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> AccessQuals; 58006c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames; 58106c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames; 58206c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals; 58306c3fb27SDimitry Andric llvm::SmallVector<llvm::Metadata *, 8> ArgNames; 58406c3fb27SDimitry Andric 58506c3fb27SDimitry Andric ArgTys.push_back(BlockTy); 58606c3fb27SDimitry Andric ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); 58706c3fb27SDimitry Andric AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); 58806c3fb27SDimitry Andric ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); 58906c3fb27SDimitry Andric ArgTypeQuals.push_back(llvm::MDString::get(C, "")); 59006c3fb27SDimitry Andric AccessQuals.push_back(llvm::MDString::get(C, "none")); 59106c3fb27SDimitry Andric ArgNames.push_back(llvm::MDString::get(C, "block_literal")); 59206c3fb27SDimitry Andric for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { 59306c3fb27SDimitry Andric ArgTys.push_back(InvokeFT->getParamType(I)); 59406c3fb27SDimitry Andric ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); 59506c3fb27SDimitry Andric AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); 59606c3fb27SDimitry Andric AccessQuals.push_back(llvm::MDString::get(C, "none")); 59706c3fb27SDimitry Andric ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); 59806c3fb27SDimitry Andric ArgTypeQuals.push_back(llvm::MDString::get(C, "")); 59906c3fb27SDimitry Andric ArgNames.push_back( 60006c3fb27SDimitry Andric llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); 60106c3fb27SDimitry Andric } 60206c3fb27SDimitry Andric std::string Name = Invoke->getName().str() + "_kernel"; 60306c3fb27SDimitry Andric auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); 60406c3fb27SDimitry Andric auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, 60506c3fb27SDimitry Andric &CGF.CGM.getModule()); 60606c3fb27SDimitry Andric F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 60706c3fb27SDimitry Andric 60806c3fb27SDimitry Andric llvm::AttrBuilder KernelAttrs(C); 60906c3fb27SDimitry Andric // FIXME: The invoke isn't applying the right attributes either 61006c3fb27SDimitry Andric // FIXME: This is missing setTargetAttributes 61106c3fb27SDimitry Andric CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs); 61206c3fb27SDimitry Andric KernelAttrs.addAttribute("enqueued-block"); 61306c3fb27SDimitry Andric F->addFnAttrs(KernelAttrs); 61406c3fb27SDimitry Andric 61506c3fb27SDimitry Andric auto IP = CGF.Builder.saveIP(); 61606c3fb27SDimitry Andric auto *BB = llvm::BasicBlock::Create(C, "entry", F); 61706c3fb27SDimitry Andric Builder.SetInsertPoint(BB); 61806c3fb27SDimitry Andric const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy); 61906c3fb27SDimitry Andric auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); 62006c3fb27SDimitry Andric BlockPtr->setAlignment(BlockAlign); 62106c3fb27SDimitry Andric Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); 62206c3fb27SDimitry Andric auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); 62306c3fb27SDimitry Andric llvm::SmallVector<llvm::Value *, 2> Args; 62406c3fb27SDimitry Andric Args.push_back(Cast); 62506c3fb27SDimitry Andric for (llvm::Argument &A : llvm::drop_begin(F->args())) 62606c3fb27SDimitry Andric Args.push_back(&A); 62706c3fb27SDimitry Andric llvm::CallInst *call = Builder.CreateCall(Invoke, Args); 62806c3fb27SDimitry Andric call->setCallingConv(Invoke->getCallingConv()); 62906c3fb27SDimitry Andric Builder.CreateRetVoid(); 63006c3fb27SDimitry Andric Builder.restoreIP(IP); 63106c3fb27SDimitry Andric 63206c3fb27SDimitry Andric F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); 63306c3fb27SDimitry Andric F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); 63406c3fb27SDimitry Andric F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); 63506c3fb27SDimitry Andric F->setMetadata("kernel_arg_base_type", 63606c3fb27SDimitry Andric llvm::MDNode::get(C, ArgBaseTypeNames)); 63706c3fb27SDimitry Andric F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); 63806c3fb27SDimitry Andric if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) 63906c3fb27SDimitry Andric F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); 64006c3fb27SDimitry Andric 64106c3fb27SDimitry Andric return F; 64206c3fb27SDimitry Andric } 64306c3fb27SDimitry Andric 6445f757f3fSDimitry Andric void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr( 6455f757f3fSDimitry Andric llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS, 6465f757f3fSDimitry Andric const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal, 6475f757f3fSDimitry Andric int32_t *MaxThreadsVal) { 6485f757f3fSDimitry Andric unsigned Min = 0; 6495f757f3fSDimitry Andric unsigned Max = 0; 6505f757f3fSDimitry Andric if (FlatWGS) { 6515f757f3fSDimitry Andric Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); 6525f757f3fSDimitry Andric Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue(); 6535f757f3fSDimitry Andric } 6545f757f3fSDimitry Andric if (ReqdWGS && Min == 0 && Max == 0) 6555f757f3fSDimitry Andric Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); 6565f757f3fSDimitry Andric 6575f757f3fSDimitry Andric if (Min != 0) { 6585f757f3fSDimitry Andric assert(Min <= Max && "Min must be less than or equal Max"); 6595f757f3fSDimitry Andric 6605f757f3fSDimitry Andric if (MinThreadsVal) 6615f757f3fSDimitry Andric *MinThreadsVal = Min; 6625f757f3fSDimitry Andric if (MaxThreadsVal) 6635f757f3fSDimitry Andric *MaxThreadsVal = Max; 6645f757f3fSDimitry Andric std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); 6655f757f3fSDimitry Andric if (F) 6665f757f3fSDimitry Andric F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); 6675f757f3fSDimitry Andric } else 6685f757f3fSDimitry Andric assert(Max == 0 && "Max must be zero"); 6695f757f3fSDimitry Andric } 6705f757f3fSDimitry Andric 6715f757f3fSDimitry Andric void CodeGenModule::handleAMDGPUWavesPerEUAttr( 6725f757f3fSDimitry Andric llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) { 6735f757f3fSDimitry Andric unsigned Min = 6745f757f3fSDimitry Andric Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); 6755f757f3fSDimitry Andric unsigned Max = 6765f757f3fSDimitry Andric Attr->getMax() 6775f757f3fSDimitry Andric ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue() 6785f757f3fSDimitry Andric : 0; 6795f757f3fSDimitry Andric 6805f757f3fSDimitry Andric if (Min != 0) { 6815f757f3fSDimitry Andric assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); 6825f757f3fSDimitry Andric 6835f757f3fSDimitry Andric std::string AttrVal = llvm::utostr(Min); 6845f757f3fSDimitry Andric if (Max != 0) 6855f757f3fSDimitry Andric AttrVal = AttrVal + "," + llvm::utostr(Max); 6865f757f3fSDimitry Andric F->addFnAttr("amdgpu-waves-per-eu", AttrVal); 6875f757f3fSDimitry Andric } else 6885f757f3fSDimitry Andric assert(Max == 0 && "Max must be zero"); 6895f757f3fSDimitry Andric } 6905f757f3fSDimitry Andric 69106c3fb27SDimitry Andric std::unique_ptr<TargetCodeGenInfo> 69206c3fb27SDimitry Andric CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) { 69306c3fb27SDimitry Andric return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes()); 69406c3fb27SDimitry Andric } 695