10b57cec5SDimitry Andric //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This file a TargetTransformInfo::Concept conforming object specific to the 110b57cec5SDimitry Andric /// AMDGPU target machine. It uses the target's detailed information to 120b57cec5SDimitry Andric /// provide more precise answers to certain TTI queries, while letting the 130b57cec5SDimitry Andric /// target independent and default TTI implementations handle the rest. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 180b57cec5SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric #include "AMDGPU.h" 210b57cec5SDimitry Andric #include "llvm/CodeGen/BasicTTIImpl.h" 22bdd1243dSDimitry Andric #include <optional> 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric namespace llvm { 250b57cec5SDimitry Andric 26fe6060f1SDimitry Andric class AMDGPUTargetMachine; 27e8d8bef9SDimitry Andric class GCNSubtarget; 28e8d8bef9SDimitry Andric class InstCombiner; 290b57cec5SDimitry Andric class Loop; 300b57cec5SDimitry Andric class ScalarEvolution; 31e8d8bef9SDimitry Andric class SITargetLowering; 320b57cec5SDimitry Andric class Type; 330b57cec5SDimitry Andric class Value; 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { 360b57cec5SDimitry Andric using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; 370b57cec5SDimitry Andric using TTI = TargetTransformInfo; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric friend BaseT; 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric Triple TargetTriple; 420b57cec5SDimitry Andric 43e8d8bef9SDimitry Andric const TargetSubtargetInfo *ST; 448bcb0991SDimitry Andric const TargetLoweringBase *TLI; 458bcb0991SDimitry Andric 468bcb0991SDimitry Andric const TargetSubtargetInfo *getST() const { return ST; } 478bcb0991SDimitry Andric const TargetLoweringBase *getTLI() const { return TLI; } 488bcb0991SDimitry Andric 490b57cec5SDimitry Andric public: 50e8d8bef9SDimitry Andric explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 53349cc55cSDimitry Andric TTI::UnrollingPreferences &UP, 54349cc55cSDimitry Andric OptimizationRemarkEmitter *ORE); 555ffd83dbSDimitry Andric 565ffd83dbSDimitry Andric void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 575ffd83dbSDimitry Andric TTI::PeelingPreferences &PP); 5806c3fb27SDimitry Andric 5906c3fb27SDimitry Andric int64_t getMaxMemIntrinsicInlineSizeThreshold() const; 600b57cec5SDimitry Andric }; 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { 630b57cec5SDimitry Andric using BaseT = BasicTTIImplBase<GCNTTIImpl>; 640b57cec5SDimitry Andric using TTI = TargetTransformInfo; 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric friend BaseT; 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric const GCNSubtarget *ST; 695ffd83dbSDimitry Andric const SITargetLowering *TLI; 700b57cec5SDimitry Andric AMDGPUTTIImpl CommonTTI; 71e8d8bef9SDimitry Andric bool IsGraphics; 72480093f4SDimitry Andric bool HasFP32Denormals; 73e8d8bef9SDimitry Andric bool HasFP64FP16Denormals; 7406c3fb27SDimitry Andric static constexpr bool InlinerVectorBonusPercent = 0; 750b57cec5SDimitry Andric 76e8d8bef9SDimitry Andric static const FeatureBitset InlineFeatureIgnoreList; 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric const GCNSubtarget *getST() const { return ST; } 79e8d8bef9SDimitry Andric const SITargetLowering *getTLI() const { return TLI; } 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric static inline int getFullRateInstrCost() { 820b57cec5SDimitry Andric return TargetTransformInfo::TCC_Basic; 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric 85349cc55cSDimitry Andric static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) { 86e8d8bef9SDimitry Andric return CostKind == TTI::TCK_CodeSize ? 2 87e8d8bef9SDimitry Andric : 2 * TargetTransformInfo::TCC_Basic; 880b57cec5SDimitry Andric } 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe 910b57cec5SDimitry Andric // should be 2 or 4. 92349cc55cSDimitry Andric static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) { 93e8d8bef9SDimitry Andric return CostKind == TTI::TCK_CodeSize ? 2 94e8d8bef9SDimitry Andric : 4 * TargetTransformInfo::TCC_Basic; 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric // On some parts, normal fp64 operations are half rate, and others 980b57cec5SDimitry Andric // quarter. This also applies to some integer operations. 99349cc55cSDimitry Andric int get64BitInstrCost(TTI::TargetCostKind CostKind) const; 1000b57cec5SDimitry Andric 101bdd1243dSDimitry Andric std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const; 102bdd1243dSDimitry Andric 1030b57cec5SDimitry Andric public: 104e8d8bef9SDimitry Andric explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); 1050b57cec5SDimitry Andric 10606c3fb27SDimitry Andric bool hasBranchDivergence(const Function *F = nullptr) const; 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 109349cc55cSDimitry Andric TTI::UnrollingPreferences &UP, 110349cc55cSDimitry Andric OptimizationRemarkEmitter *ORE); 1110b57cec5SDimitry Andric 1125ffd83dbSDimitry Andric void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 1135ffd83dbSDimitry Andric TTI::PeelingPreferences &PP); 1145ffd83dbSDimitry Andric 1150b57cec5SDimitry Andric TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { 1160b57cec5SDimitry Andric assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 1170b57cec5SDimitry Andric return TTI::PSK_FastHardware; 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric 1205ffd83dbSDimitry Andric unsigned getNumberOfRegisters(unsigned RCID) const; 121fe6060f1SDimitry Andric TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const; 1220b57cec5SDimitry Andric unsigned getMinVectorRegisterBitWidth() const; 123e8d8bef9SDimitry Andric unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; 1240b57cec5SDimitry Andric unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 1250b57cec5SDimitry Andric unsigned ChainSizeInBytes, 1260b57cec5SDimitry Andric VectorType *VecTy) const; 1270b57cec5SDimitry Andric unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 1280b57cec5SDimitry Andric unsigned ChainSizeInBytes, 1290b57cec5SDimitry Andric VectorType *VecTy) const; 1300b57cec5SDimitry Andric unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 1310b57cec5SDimitry Andric 1325ffd83dbSDimitry Andric bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, 1330b57cec5SDimitry Andric unsigned AddrSpace) const; 1345ffd83dbSDimitry Andric bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 1350b57cec5SDimitry Andric unsigned AddrSpace) const; 1365ffd83dbSDimitry Andric bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 1370b57cec5SDimitry Andric unsigned AddrSpace) const; 13806c3fb27SDimitry Andric 13906c3fb27SDimitry Andric int64_t getMaxMemIntrinsicInlineSizeThreshold() const; 140bdd1243dSDimitry Andric Type *getMemcpyLoopLoweringType( 141bdd1243dSDimitry Andric LLVMContext & Context, Value * Length, unsigned SrcAddrSpace, 142bdd1243dSDimitry Andric unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, 143bdd1243dSDimitry Andric std::optional<uint32_t> AtomicElementSize) const; 1440b57cec5SDimitry Andric 14581ad6265SDimitry Andric void getMemcpyLoopResidualLoweringType( 14681ad6265SDimitry Andric SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 14781ad6265SDimitry Andric unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 14881ad6265SDimitry Andric unsigned SrcAlign, unsigned DestAlign, 149bdd1243dSDimitry Andric std::optional<uint32_t> AtomicCpySize) const; 15006c3fb27SDimitry Andric unsigned getMaxInterleaveFactor(ElementCount VF); 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 1530b57cec5SDimitry Andric 154fe6060f1SDimitry Andric InstructionCost getArithmeticInstrCost( 155349cc55cSDimitry Andric unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 156bdd1243dSDimitry Andric TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 157bdd1243dSDimitry Andric TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 158*0fca6ea1SDimitry Andric ArrayRef<const Value *> Args = std::nullopt, 159480093f4SDimitry Andric const Instruction *CxtI = nullptr); 1600b57cec5SDimitry Andric 161fe6060f1SDimitry Andric InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 162fe6060f1SDimitry Andric const Instruction *I = nullptr); 1635ffd83dbSDimitry Andric 1645ffd83dbSDimitry Andric bool isInlineAsmSourceOfDivergence(const CallInst *CI, 1655ffd83dbSDimitry Andric ArrayRef<unsigned> Indices = {}) const; 1660b57cec5SDimitry Andric 167bdd1243dSDimitry Andric using BaseT::getVectorInstrCost; 168fe6060f1SDimitry Andric InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, 169bdd1243dSDimitry Andric TTI::TargetCostKind CostKind, 170bdd1243dSDimitry Andric unsigned Index, Value *Op0, Value *Op1); 171bdd1243dSDimitry Andric 172bdd1243dSDimitry Andric bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const; 1730b57cec5SDimitry Andric bool isSourceOfDivergence(const Value *V) const; 1740b57cec5SDimitry Andric bool isAlwaysUniform(const Value *V) const; 1750b57cec5SDimitry Andric 17606c3fb27SDimitry Andric bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { 17706c3fb27SDimitry Andric if (ToAS == AMDGPUAS::FLAT_ADDRESS) { 17806c3fb27SDimitry Andric switch (FromAS) { 17906c3fb27SDimitry Andric case AMDGPUAS::GLOBAL_ADDRESS: 18006c3fb27SDimitry Andric case AMDGPUAS::CONSTANT_ADDRESS: 18106c3fb27SDimitry Andric case AMDGPUAS::CONSTANT_ADDRESS_32BIT: 18206c3fb27SDimitry Andric case AMDGPUAS::LOCAL_ADDRESS: 18306c3fb27SDimitry Andric case AMDGPUAS::PRIVATE_ADDRESS: 18406c3fb27SDimitry Andric return true; 18506c3fb27SDimitry Andric default: 18606c3fb27SDimitry Andric break; 18706c3fb27SDimitry Andric } 18806c3fb27SDimitry Andric return false; 18906c3fb27SDimitry Andric } 19006c3fb27SDimitry Andric if ((FromAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT && 19106c3fb27SDimitry Andric ToAS == AMDGPUAS::CONSTANT_ADDRESS) || 19206c3fb27SDimitry Andric (FromAS == AMDGPUAS::CONSTANT_ADDRESS && 19306c3fb27SDimitry Andric ToAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)) 19406c3fb27SDimitry Andric return true; 19506c3fb27SDimitry Andric return false; 19606c3fb27SDimitry Andric } 19706c3fb27SDimitry Andric 19806c3fb27SDimitry Andric bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const { 19906c3fb27SDimitry Andric return AMDGPU::addrspacesMayAlias(AS0, AS1); 20006c3fb27SDimitry Andric } 20106c3fb27SDimitry Andric 2020b57cec5SDimitry Andric unsigned getFlatAddressSpace() const { 2030b57cec5SDimitry Andric // Don't bother running InferAddressSpaces pass on graphics shaders which 2040b57cec5SDimitry Andric // don't use flat addressing. 205e8d8bef9SDimitry Andric if (IsGraphics) 2060b57cec5SDimitry Andric return -1; 2070b57cec5SDimitry Andric return AMDGPUAS::FLAT_ADDRESS; 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2108bcb0991SDimitry Andric bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 2118bcb0991SDimitry Andric Intrinsic::ID IID) const; 212349cc55cSDimitry Andric 213349cc55cSDimitry Andric bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 214349cc55cSDimitry Andric return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && 215349cc55cSDimitry Andric AS != AMDGPUAS::PRIVATE_ADDRESS; 216349cc55cSDimitry Andric } 217349cc55cSDimitry Andric 2185ffd83dbSDimitry Andric Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 2195ffd83dbSDimitry Andric Value *NewV) const; 2208bcb0991SDimitry Andric 22106c3fb27SDimitry Andric bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, 22206c3fb27SDimitry Andric const Value *Op1, InstCombiner &IC) const; 223bdd1243dSDimitry Andric std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 224e8d8bef9SDimitry Andric IntrinsicInst &II) const; 225bdd1243dSDimitry Andric std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 226e8d8bef9SDimitry Andric InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 227e8d8bef9SDimitry Andric APInt &UndefElts2, APInt &UndefElts3, 228e8d8bef9SDimitry Andric std::function<void(Instruction *, unsigned, APInt, APInt &)> 229e8d8bef9SDimitry Andric SimplifyAndSetOp) const; 230e8d8bef9SDimitry Andric 231fe6060f1SDimitry Andric InstructionCost getVectorSplitCost() { return 0; } 2320b57cec5SDimitry Andric 233fe6060f1SDimitry Andric InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 234bdd1243dSDimitry Andric ArrayRef<int> Mask, 235bdd1243dSDimitry Andric TTI::TargetCostKind CostKind, int Index, 23681ad6265SDimitry Andric VectorType *SubTp, 237*0fca6ea1SDimitry Andric ArrayRef<const Value *> Args = std::nullopt, 238*0fca6ea1SDimitry Andric const Instruction *CxtI = nullptr); 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric bool areInlineCompatible(const Function *Caller, 2410b57cec5SDimitry Andric const Function *Callee) const; 2420b57cec5SDimitry Andric 24306c3fb27SDimitry Andric unsigned getInliningThresholdMultiplier() const { return 11; } 244e8d8bef9SDimitry Andric unsigned adjustInliningThreshold(const CallBase *CB) const; 24506c3fb27SDimitry Andric unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const; 2460b57cec5SDimitry Andric 24706c3fb27SDimitry Andric int getInlinerVectorBonusPercent() const { return InlinerVectorBonusPercent; } 2480b57cec5SDimitry Andric 249fe6060f1SDimitry Andric InstructionCost getArithmeticReductionCost( 250bdd1243dSDimitry Andric unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF, 251349cc55cSDimitry Andric TTI::TargetCostKind CostKind); 2525ffd83dbSDimitry Andric 253fe6060f1SDimitry Andric InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 2545ffd83dbSDimitry Andric TTI::TargetCostKind CostKind); 25506c3fb27SDimitry Andric InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, 25606c3fb27SDimitry Andric FastMathFlags FMF, 257349cc55cSDimitry Andric TTI::TargetCostKind CostKind); 258cb14a3feSDimitry Andric 259cb14a3feSDimitry Andric /// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12. 260cb14a3feSDimitry Andric unsigned getCacheLineSize() const override { return 128; } 261cb14a3feSDimitry Andric 262cb14a3feSDimitry Andric /// How much before a load we should place the prefetch instruction. 263cb14a3feSDimitry Andric /// This is currently measured in number of IR instructions. 264cb14a3feSDimitry Andric unsigned getPrefetchDistance() const override; 265cb14a3feSDimitry Andric 266cb14a3feSDimitry Andric /// \return if target want to issue a prefetch in address space \p AS. 267cb14a3feSDimitry Andric bool shouldPrefetchAddressSpace(unsigned AS) const override; 2680b57cec5SDimitry Andric }; 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric } // end namespace llvm 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 273