Target/AMDGPU/AMDGPUTargetTransformInfo.h

0b57cec5SDimitry Andric//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric/// \file
0b57cec5SDimitry Andric/// This file a TargetTransformInfo::Concept conforming object specific to the
0b57cec5SDimitry Andric/// AMDGPU target machine. It uses the target's detailed information to
0b57cec5SDimitry Andric/// provide more precise answers to certain TTI queries, while letting the
0b57cec5SDimitry Andric/// target independent and default TTI implementations handle the rest.
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
0b57cec5SDimitry Andric#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric#include "AMDGPU.h"
0b57cec5SDimitry Andric#include "llvm/CodeGen/BasicTTIImpl.h"
bdd1243dSDimitry Andric#include <optional>
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricnamespace llvm {
0b57cec5SDimitry Andric
fe6060f1SDimitry Andricclass AMDGPUTargetMachine;
e8d8bef9SDimitry Andricclass GCNSubtarget;
e8d8bef9SDimitry Andricclass InstCombiner;
0b57cec5SDimitry Andricclass Loop;
0b57cec5SDimitry Andricclass ScalarEvolution;
e8d8bef9SDimitry Andricclass SITargetLowering;
0b57cec5SDimitry Andricclass Type;
0b57cec5SDimitry Andricclass Value;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricclass AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
0b57cec5SDimitry Andric  using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
0b57cec5SDimitry Andric  using TTI = TargetTransformInfo;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  friend BaseT;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  Triple TargetTriple;
0b57cec5SDimitry Andric
e8d8bef9SDimitry Andric  const TargetSubtargetInfo *ST;
8bcb0991SDimitry Andric  const TargetLoweringBase *TLI;
8bcb0991SDimitry Andric
8bcb0991SDimitry Andric  const TargetSubtargetInfo *getST() const { return ST; }
8bcb0991SDimitry Andric  const TargetLoweringBase *getTLI() const { return TLI; }
8bcb0991SDimitry Andric
0b57cec5SDimitry Andricpublic:
e8d8bef9SDimitry Andric  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
349cc55cSDimitry Andric                               TTI::UnrollingPreferences &UP,
349cc55cSDimitry Andric                               OptimizationRemarkEmitter *ORE);
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
5ffd83dbSDimitry Andric                             TTI::PeelingPreferences &PP);
06c3fb27SDimitry Andric
06c3fb27SDimitry Andric  int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
0b57cec5SDimitry Andric};
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricclass GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
0b57cec5SDimitry Andric  using BaseT = BasicTTIImplBase<GCNTTIImpl>;
0b57cec5SDimitry Andric  using TTI = TargetTransformInfo;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  friend BaseT;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  const GCNSubtarget *ST;
5ffd83dbSDimitry Andric  const SITargetLowering *TLI;
0b57cec5SDimitry Andric  AMDGPUTTIImpl CommonTTI;
e8d8bef9SDimitry Andric  bool IsGraphics;
480093f4SDimitry Andric  bool HasFP32Denormals;
e8d8bef9SDimitry Andric  bool HasFP64FP16Denormals;
06c3fb27SDimitry Andric  static constexpr bool InlinerVectorBonusPercent = 0;
0b57cec5SDimitry Andric
e8d8bef9SDimitry Andric  static const FeatureBitset InlineFeatureIgnoreList;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  const GCNSubtarget *getST() const { return ST; }
e8d8bef9SDimitry Andric  const SITargetLowering *getTLI() const { return TLI; }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  static inline int getFullRateInstrCost() {
0b57cec5SDimitry Andric    return TargetTransformInfo::TCC_Basic;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
349cc55cSDimitry Andric  static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {
e8d8bef9SDimitry Andric    return CostKind == TTI::TCK_CodeSize ? 2
e8d8bef9SDimitry Andric                                         : 2 * TargetTransformInfo::TCC_Basic;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
0b57cec5SDimitry Andric  // should be 2 or 4.
349cc55cSDimitry Andric  static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {
e8d8bef9SDimitry Andric    return CostKind == TTI::TCK_CodeSize ? 2
e8d8bef9SDimitry Andric                                         : 4 * TargetTransformInfo::TCC_Basic;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // On some parts, normal fp64 operations are half rate, and others
0b57cec5SDimitry Andric  // quarter. This also applies to some integer operations.
349cc55cSDimitry Andric  int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
0b57cec5SDimitry Andric
bdd1243dSDimitry Andric  std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const;
bdd1243dSDimitry Andric
0b57cec5SDimitry Andricpublic:
e8d8bef9SDimitry Andric  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
0b57cec5SDimitry Andric
06c3fb27SDimitry Andric  bool hasBranchDivergence(const Function *F = nullptr) const;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
349cc55cSDimitry Andric                               TTI::UnrollingPreferences &UP,
349cc55cSDimitry Andric                               OptimizationRemarkEmitter *ORE);
0b57cec5SDimitry Andric
5ffd83dbSDimitry Andric  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
5ffd83dbSDimitry Andric                             TTI::PeelingPreferences &PP);
5ffd83dbSDimitry Andric
0b57cec5SDimitry Andric  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
0b57cec5SDimitry Andric    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
0b57cec5SDimitry Andric    return TTI::PSK_FastHardware;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
5ffd83dbSDimitry Andric  unsigned getNumberOfRegisters(unsigned RCID) const;
fe6060f1SDimitry Andric  TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
0b57cec5SDimitry Andric  unsigned getMinVectorRegisterBitWidth() const;
e8d8bef9SDimitry Andric  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
0b57cec5SDimitry Andric  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
0b57cec5SDimitry Andric                               unsigned ChainSizeInBytes,
0b57cec5SDimitry Andric                               VectorType *VecTy) const;
0b57cec5SDimitry Andric  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
0b57cec5SDimitry Andric                                unsigned ChainSizeInBytes,
0b57cec5SDimitry Andric                                VectorType *VecTy) const;
0b57cec5SDimitry Andric  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
0b57cec5SDimitry Andric
5ffd83dbSDimitry Andric  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
0b57cec5SDimitry Andric                                  unsigned AddrSpace) const;
5ffd83dbSDimitry Andric  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
0b57cec5SDimitry Andric                                   unsigned AddrSpace) const;
5ffd83dbSDimitry Andric  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
0b57cec5SDimitry Andric                                    unsigned AddrSpace) const;
06c3fb27SDimitry Andric
06c3fb27SDimitry Andric  int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
bdd1243dSDimitry Andric  Type *getMemcpyLoopLoweringType(
bdd1243dSDimitry Andric      LLVMContext & Context, Value * Length, unsigned SrcAddrSpace,
bdd1243dSDimitry Andric      unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
bdd1243dSDimitry Andric      std::optional<uint32_t> AtomicElementSize) const;
0b57cec5SDimitry Andric
81ad6265SDimitry Andric  void getMemcpyLoopResidualLoweringType(
81ad6265SDimitry Andric      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
81ad6265SDimitry Andric      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
81ad6265SDimitry Andric      unsigned SrcAlign, unsigned DestAlign,
bdd1243dSDimitry Andric      std::optional<uint32_t> AtomicCpySize) const;
06c3fb27SDimitry Andric  unsigned getMaxInterleaveFactor(ElementCount VF);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
0b57cec5SDimitry Andric
fe6060f1SDimitry Andric  InstructionCost getArithmeticInstrCost(
349cc55cSDimitry Andric      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
bdd1243dSDimitry Andric      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
bdd1243dSDimitry Andric      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
*0fca6ea1SDimitry Andric      ArrayRef<const Value *> Args = std::nullopt,
480093f4SDimitry Andric      const Instruction *CxtI = nullptr);
0b57cec5SDimitry Andric
fe6060f1SDimitry Andric  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
fe6060f1SDimitry Andric                                 const Instruction *I = nullptr);
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric  bool isInlineAsmSourceOfDivergence(const CallInst *CI,
5ffd83dbSDimitry Andric                                     ArrayRef<unsigned> Indices = {}) const;
0b57cec5SDimitry Andric
bdd1243dSDimitry Andric  using BaseT::getVectorInstrCost;
fe6060f1SDimitry Andric  InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
bdd1243dSDimitry Andric                                     TTI::TargetCostKind CostKind,
bdd1243dSDimitry Andric                                     unsigned Index, Value *Op0, Value *Op1);
bdd1243dSDimitry Andric
bdd1243dSDimitry Andric  bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;
0b57cec5SDimitry Andric  bool isSourceOfDivergence(const Value *V) const;
0b57cec5SDimitry Andric  bool isAlwaysUniform(const Value *V) const;
0b57cec5SDimitry Andric
06c3fb27SDimitry Andric  bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
06c3fb27SDimitry Andric    if (ToAS == AMDGPUAS::FLAT_ADDRESS) {
06c3fb27SDimitry Andric      switch (FromAS) {
06c3fb27SDimitry Andric      case AMDGPUAS::GLOBAL_ADDRESS:
06c3fb27SDimitry Andric      case AMDGPUAS::CONSTANT_ADDRESS:
06c3fb27SDimitry Andric      case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
06c3fb27SDimitry Andric      case AMDGPUAS::LOCAL_ADDRESS:
06c3fb27SDimitry Andric      case AMDGPUAS::PRIVATE_ADDRESS:
06c3fb27SDimitry Andric        return true;
06c3fb27SDimitry Andric      default:
06c3fb27SDimitry Andric        break;
06c3fb27SDimitry Andric      }
06c3fb27SDimitry Andric      return false;
06c3fb27SDimitry Andric    }
06c3fb27SDimitry Andric    if ((FromAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
06c3fb27SDimitry Andric         ToAS == AMDGPUAS::CONSTANT_ADDRESS) ||
06c3fb27SDimitry Andric        (FromAS == AMDGPUAS::CONSTANT_ADDRESS &&
06c3fb27SDimitry Andric         ToAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT))
06c3fb27SDimitry Andric      return true;
06c3fb27SDimitry Andric    return false;
06c3fb27SDimitry Andric  }
06c3fb27SDimitry Andric
06c3fb27SDimitry Andric  bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
06c3fb27SDimitry Andric    return AMDGPU::addrspacesMayAlias(AS0, AS1);
06c3fb27SDimitry Andric  }
06c3fb27SDimitry Andric
0b57cec5SDimitry Andric  unsigned getFlatAddressSpace() const {
0b57cec5SDimitry Andric    // Don't bother running InferAddressSpaces pass on graphics shaders which
0b57cec5SDimitry Andric    // don't use flat addressing.
e8d8bef9SDimitry Andric    if (IsGraphics)
0b57cec5SDimitry Andric      return -1;
0b57cec5SDimitry Andric    return AMDGPUAS::FLAT_ADDRESS;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
8bcb0991SDimitry Andric  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
8bcb0991SDimitry Andric                                  Intrinsic::ID IID) const;
349cc55cSDimitry Andric
349cc55cSDimitry Andric  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
349cc55cSDimitry Andric    return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
349cc55cSDimitry Andric           AS != AMDGPUAS::PRIVATE_ADDRESS;
349cc55cSDimitry Andric  }
349cc55cSDimitry Andric
5ffd83dbSDimitry Andric  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
5ffd83dbSDimitry Andric                                          Value *NewV) const;
8bcb0991SDimitry Andric
06c3fb27SDimitry Andric  bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
06c3fb27SDimitry Andric                                 const Value *Op1, InstCombiner &IC) const;
bdd1243dSDimitry Andric  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
e8d8bef9SDimitry Andric                                                    IntrinsicInst &II) const;
bdd1243dSDimitry Andric  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
e8d8bef9SDimitry Andric      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
e8d8bef9SDimitry Andric      APInt &UndefElts2, APInt &UndefElts3,
e8d8bef9SDimitry Andric      std::function<void(Instruction *, unsigned, APInt, APInt &)>
e8d8bef9SDimitry Andric          SimplifyAndSetOp) const;
e8d8bef9SDimitry Andric
fe6060f1SDimitry Andric  InstructionCost getVectorSplitCost() { return 0; }
0b57cec5SDimitry Andric
fe6060f1SDimitry Andric  InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
bdd1243dSDimitry Andric                                 ArrayRef<int> Mask,
bdd1243dSDimitry Andric                                 TTI::TargetCostKind CostKind, int Index,
81ad6265SDimitry Andric                                 VectorType *SubTp,
*0fca6ea1SDimitry Andric                                 ArrayRef<const Value *> Args = std::nullopt,
*0fca6ea1SDimitry Andric                                 const Instruction *CxtI = nullptr);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  bool areInlineCompatible(const Function *Caller,
0b57cec5SDimitry Andric                           const Function *Callee) const;
0b57cec5SDimitry Andric
06c3fb27SDimitry Andric  unsigned getInliningThresholdMultiplier() const { return 11; }
e8d8bef9SDimitry Andric  unsigned adjustInliningThreshold(const CallBase *CB) const;
06c3fb27SDimitry Andric  unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
0b57cec5SDimitry Andric
06c3fb27SDimitry Andric  int getInlinerVectorBonusPercent() const { return InlinerVectorBonusPercent; }
0b57cec5SDimitry Andric
fe6060f1SDimitry Andric  InstructionCost getArithmeticReductionCost(
bdd1243dSDimitry Andric      unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
349cc55cSDimitry Andric      TTI::TargetCostKind CostKind);
5ffd83dbSDimitry Andric
fe6060f1SDimitry Andric  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
5ffd83dbSDimitry Andric                                        TTI::TargetCostKind CostKind);
06c3fb27SDimitry Andric  InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
06c3fb27SDimitry Andric                                         FastMathFlags FMF,
349cc55cSDimitry Andric                                         TTI::TargetCostKind CostKind);
cb14a3feSDimitry Andric
cb14a3feSDimitry Andric  /// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
cb14a3feSDimitry Andric  unsigned getCacheLineSize() const override { return 128; }
cb14a3feSDimitry Andric
cb14a3feSDimitry Andric  /// How much before a load we should place the prefetch instruction.
cb14a3feSDimitry Andric  /// This is currently measured in number of IR instructions.
cb14a3feSDimitry Andric  unsigned getPrefetchDistance() const override;
cb14a3feSDimitry Andric
cb14a3feSDimitry Andric  /// \return if target want to issue a prefetch in address space \p AS.
cb14a3feSDimitry Andric  bool shouldPrefetchAddressSpace(unsigned AS) const override;
0b57cec5SDimitry Andric};
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric} // end namespace llvm
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H