xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file a TargetTransformInfo::Concept conforming object specific to the
110b57cec5SDimitry Andric /// AMDGPU target machine. It uses the target's detailed information to
120b57cec5SDimitry Andric /// provide more precise answers to certain TTI queries, while letting the
130b57cec5SDimitry Andric /// target independent and default TTI implementations handle the rest.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
180b57cec5SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric #include "AMDGPU.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/BasicTTIImpl.h"
22bdd1243dSDimitry Andric #include <optional>
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric namespace llvm {
250b57cec5SDimitry Andric 
26fe6060f1SDimitry Andric class AMDGPUTargetMachine;
27e8d8bef9SDimitry Andric class GCNSubtarget;
28e8d8bef9SDimitry Andric class InstCombiner;
290b57cec5SDimitry Andric class Loop;
300b57cec5SDimitry Andric class ScalarEvolution;
31e8d8bef9SDimitry Andric class SITargetLowering;
320b57cec5SDimitry Andric class Type;
330b57cec5SDimitry Andric class Value;
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
360b57cec5SDimitry Andric   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
370b57cec5SDimitry Andric   using TTI = TargetTransformInfo;
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric   friend BaseT;
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric   Triple TargetTriple;
420b57cec5SDimitry Andric 
43e8d8bef9SDimitry Andric   const TargetSubtargetInfo *ST;
448bcb0991SDimitry Andric   const TargetLoweringBase *TLI;
458bcb0991SDimitry Andric 
468bcb0991SDimitry Andric   const TargetSubtargetInfo *getST() const { return ST; }
478bcb0991SDimitry Andric   const TargetLoweringBase *getTLI() const { return TLI; }
488bcb0991SDimitry Andric 
490b57cec5SDimitry Andric public:
50e8d8bef9SDimitry Andric   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
53349cc55cSDimitry Andric                                TTI::UnrollingPreferences &UP,
54349cc55cSDimitry Andric                                OptimizationRemarkEmitter *ORE);
555ffd83dbSDimitry Andric 
565ffd83dbSDimitry Andric   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
575ffd83dbSDimitry Andric                              TTI::PeelingPreferences &PP);
5806c3fb27SDimitry Andric 
5906c3fb27SDimitry Andric   int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
600b57cec5SDimitry Andric };
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
630b57cec5SDimitry Andric   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
640b57cec5SDimitry Andric   using TTI = TargetTransformInfo;
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric   friend BaseT;
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   const GCNSubtarget *ST;
695ffd83dbSDimitry Andric   const SITargetLowering *TLI;
700b57cec5SDimitry Andric   AMDGPUTTIImpl CommonTTI;
71e8d8bef9SDimitry Andric   bool IsGraphics;
72480093f4SDimitry Andric   bool HasFP32Denormals;
73e8d8bef9SDimitry Andric   bool HasFP64FP16Denormals;
7406c3fb27SDimitry Andric   static constexpr bool InlinerVectorBonusPercent = 0;
750b57cec5SDimitry Andric 
76e8d8bef9SDimitry Andric   static const FeatureBitset InlineFeatureIgnoreList;
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   const GCNSubtarget *getST() const { return ST; }
79e8d8bef9SDimitry Andric   const SITargetLowering *getTLI() const { return TLI; }
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric   static inline int getFullRateInstrCost() {
820b57cec5SDimitry Andric     return TargetTransformInfo::TCC_Basic;
830b57cec5SDimitry Andric   }
840b57cec5SDimitry Andric 
85349cc55cSDimitry Andric   static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {
86e8d8bef9SDimitry Andric     return CostKind == TTI::TCK_CodeSize ? 2
87e8d8bef9SDimitry Andric                                          : 2 * TargetTransformInfo::TCC_Basic;
880b57cec5SDimitry Andric   }
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
910b57cec5SDimitry Andric   // should be 2 or 4.
92349cc55cSDimitry Andric   static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {
93e8d8bef9SDimitry Andric     return CostKind == TTI::TCK_CodeSize ? 2
94e8d8bef9SDimitry Andric                                          : 4 * TargetTransformInfo::TCC_Basic;
950b57cec5SDimitry Andric   }
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric   // On some parts, normal fp64 operations are half rate, and others
980b57cec5SDimitry Andric   // quarter. This also applies to some integer operations.
99349cc55cSDimitry Andric   int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
1000b57cec5SDimitry Andric 
101bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const;
102bdd1243dSDimitry Andric 
1030b57cec5SDimitry Andric public:
104e8d8bef9SDimitry Andric   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
1050b57cec5SDimitry Andric 
10606c3fb27SDimitry Andric   bool hasBranchDivergence(const Function *F = nullptr) const;
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
109349cc55cSDimitry Andric                                TTI::UnrollingPreferences &UP,
110349cc55cSDimitry Andric                                OptimizationRemarkEmitter *ORE);
1110b57cec5SDimitry Andric 
1125ffd83dbSDimitry Andric   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1135ffd83dbSDimitry Andric                              TTI::PeelingPreferences &PP);
1145ffd83dbSDimitry Andric 
1150b57cec5SDimitry Andric   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
1160b57cec5SDimitry Andric     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
1170b57cec5SDimitry Andric     return TTI::PSK_FastHardware;
1180b57cec5SDimitry Andric   }
1190b57cec5SDimitry Andric 
1205ffd83dbSDimitry Andric   unsigned getNumberOfRegisters(unsigned RCID) const;
121fe6060f1SDimitry Andric   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
1220b57cec5SDimitry Andric   unsigned getMinVectorRegisterBitWidth() const;
123e8d8bef9SDimitry Andric   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1240b57cec5SDimitry Andric   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1250b57cec5SDimitry Andric                                unsigned ChainSizeInBytes,
1260b57cec5SDimitry Andric                                VectorType *VecTy) const;
1270b57cec5SDimitry Andric   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1280b57cec5SDimitry Andric                                 unsigned ChainSizeInBytes,
1290b57cec5SDimitry Andric                                 VectorType *VecTy) const;
1300b57cec5SDimitry Andric   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1310b57cec5SDimitry Andric 
1325ffd83dbSDimitry Andric   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
1330b57cec5SDimitry Andric                                   unsigned AddrSpace) const;
1345ffd83dbSDimitry Andric   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1350b57cec5SDimitry Andric                                    unsigned AddrSpace) const;
1365ffd83dbSDimitry Andric   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1370b57cec5SDimitry Andric                                     unsigned AddrSpace) const;
13806c3fb27SDimitry Andric 
13906c3fb27SDimitry Andric   int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
140bdd1243dSDimitry Andric   Type *getMemcpyLoopLoweringType(
141bdd1243dSDimitry Andric       LLVMContext & Context, Value * Length, unsigned SrcAddrSpace,
142bdd1243dSDimitry Andric       unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
143bdd1243dSDimitry Andric       std::optional<uint32_t> AtomicElementSize) const;
1440b57cec5SDimitry Andric 
14581ad6265SDimitry Andric   void getMemcpyLoopResidualLoweringType(
14681ad6265SDimitry Andric       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
14781ad6265SDimitry Andric       unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
14881ad6265SDimitry Andric       unsigned SrcAlign, unsigned DestAlign,
149bdd1243dSDimitry Andric       std::optional<uint32_t> AtomicCpySize) const;
15006c3fb27SDimitry Andric   unsigned getMaxInterleaveFactor(ElementCount VF);
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1530b57cec5SDimitry Andric 
154fe6060f1SDimitry Andric   InstructionCost getArithmeticInstrCost(
155349cc55cSDimitry Andric       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
156bdd1243dSDimitry Andric       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
157bdd1243dSDimitry Andric       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
158*0fca6ea1SDimitry Andric       ArrayRef<const Value *> Args = std::nullopt,
159480093f4SDimitry Andric       const Instruction *CxtI = nullptr);
1600b57cec5SDimitry Andric 
161fe6060f1SDimitry Andric   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
162fe6060f1SDimitry Andric                                  const Instruction *I = nullptr);
1635ffd83dbSDimitry Andric 
1645ffd83dbSDimitry Andric   bool isInlineAsmSourceOfDivergence(const CallInst *CI,
1655ffd83dbSDimitry Andric                                      ArrayRef<unsigned> Indices = {}) const;
1660b57cec5SDimitry Andric 
167bdd1243dSDimitry Andric   using BaseT::getVectorInstrCost;
168fe6060f1SDimitry Andric   InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
169bdd1243dSDimitry Andric                                      TTI::TargetCostKind CostKind,
170bdd1243dSDimitry Andric                                      unsigned Index, Value *Op0, Value *Op1);
171bdd1243dSDimitry Andric 
172bdd1243dSDimitry Andric   bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;
1730b57cec5SDimitry Andric   bool isSourceOfDivergence(const Value *V) const;
1740b57cec5SDimitry Andric   bool isAlwaysUniform(const Value *V) const;
1750b57cec5SDimitry Andric 
17606c3fb27SDimitry Andric   bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
17706c3fb27SDimitry Andric     if (ToAS == AMDGPUAS::FLAT_ADDRESS) {
17806c3fb27SDimitry Andric       switch (FromAS) {
17906c3fb27SDimitry Andric       case AMDGPUAS::GLOBAL_ADDRESS:
18006c3fb27SDimitry Andric       case AMDGPUAS::CONSTANT_ADDRESS:
18106c3fb27SDimitry Andric       case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
18206c3fb27SDimitry Andric       case AMDGPUAS::LOCAL_ADDRESS:
18306c3fb27SDimitry Andric       case AMDGPUAS::PRIVATE_ADDRESS:
18406c3fb27SDimitry Andric         return true;
18506c3fb27SDimitry Andric       default:
18606c3fb27SDimitry Andric         break;
18706c3fb27SDimitry Andric       }
18806c3fb27SDimitry Andric       return false;
18906c3fb27SDimitry Andric     }
19006c3fb27SDimitry Andric     if ((FromAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
19106c3fb27SDimitry Andric          ToAS == AMDGPUAS::CONSTANT_ADDRESS) ||
19206c3fb27SDimitry Andric         (FromAS == AMDGPUAS::CONSTANT_ADDRESS &&
19306c3fb27SDimitry Andric          ToAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT))
19406c3fb27SDimitry Andric       return true;
19506c3fb27SDimitry Andric     return false;
19606c3fb27SDimitry Andric   }
19706c3fb27SDimitry Andric 
19806c3fb27SDimitry Andric   bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
19906c3fb27SDimitry Andric     return AMDGPU::addrspacesMayAlias(AS0, AS1);
20006c3fb27SDimitry Andric   }
20106c3fb27SDimitry Andric 
2020b57cec5SDimitry Andric   unsigned getFlatAddressSpace() const {
2030b57cec5SDimitry Andric     // Don't bother running InferAddressSpaces pass on graphics shaders which
2040b57cec5SDimitry Andric     // don't use flat addressing.
205e8d8bef9SDimitry Andric     if (IsGraphics)
2060b57cec5SDimitry Andric       return -1;
2070b57cec5SDimitry Andric     return AMDGPUAS::FLAT_ADDRESS;
2080b57cec5SDimitry Andric   }
2090b57cec5SDimitry Andric 
2108bcb0991SDimitry Andric   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2118bcb0991SDimitry Andric                                   Intrinsic::ID IID) const;
212349cc55cSDimitry Andric 
213349cc55cSDimitry Andric   bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
214349cc55cSDimitry Andric     return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
215349cc55cSDimitry Andric            AS != AMDGPUAS::PRIVATE_ADDRESS;
216349cc55cSDimitry Andric   }
217349cc55cSDimitry Andric 
2185ffd83dbSDimitry Andric   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2195ffd83dbSDimitry Andric                                           Value *NewV) const;
2208bcb0991SDimitry Andric 
22106c3fb27SDimitry Andric   bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
22206c3fb27SDimitry Andric                                  const Value *Op1, InstCombiner &IC) const;
223bdd1243dSDimitry Andric   std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
224e8d8bef9SDimitry Andric                                                     IntrinsicInst &II) const;
225bdd1243dSDimitry Andric   std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
226e8d8bef9SDimitry Andric       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
227e8d8bef9SDimitry Andric       APInt &UndefElts2, APInt &UndefElts3,
228e8d8bef9SDimitry Andric       std::function<void(Instruction *, unsigned, APInt, APInt &)>
229e8d8bef9SDimitry Andric           SimplifyAndSetOp) const;
230e8d8bef9SDimitry Andric 
231fe6060f1SDimitry Andric   InstructionCost getVectorSplitCost() { return 0; }
2320b57cec5SDimitry Andric 
233fe6060f1SDimitry Andric   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
234bdd1243dSDimitry Andric                                  ArrayRef<int> Mask,
235bdd1243dSDimitry Andric                                  TTI::TargetCostKind CostKind, int Index,
23681ad6265SDimitry Andric                                  VectorType *SubTp,
237*0fca6ea1SDimitry Andric                                  ArrayRef<const Value *> Args = std::nullopt,
238*0fca6ea1SDimitry Andric                                  const Instruction *CxtI = nullptr);
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric   bool areInlineCompatible(const Function *Caller,
2410b57cec5SDimitry Andric                            const Function *Callee) const;
2420b57cec5SDimitry Andric 
24306c3fb27SDimitry Andric   unsigned getInliningThresholdMultiplier() const { return 11; }
244e8d8bef9SDimitry Andric   unsigned adjustInliningThreshold(const CallBase *CB) const;
24506c3fb27SDimitry Andric   unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
2460b57cec5SDimitry Andric 
24706c3fb27SDimitry Andric   int getInlinerVectorBonusPercent() const { return InlinerVectorBonusPercent; }
2480b57cec5SDimitry Andric 
249fe6060f1SDimitry Andric   InstructionCost getArithmeticReductionCost(
250bdd1243dSDimitry Andric       unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
251349cc55cSDimitry Andric       TTI::TargetCostKind CostKind);
2525ffd83dbSDimitry Andric 
253fe6060f1SDimitry Andric   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2545ffd83dbSDimitry Andric                                         TTI::TargetCostKind CostKind);
25506c3fb27SDimitry Andric   InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
25606c3fb27SDimitry Andric                                          FastMathFlags FMF,
257349cc55cSDimitry Andric                                          TTI::TargetCostKind CostKind);
258cb14a3feSDimitry Andric 
259cb14a3feSDimitry Andric   /// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
260cb14a3feSDimitry Andric   unsigned getCacheLineSize() const override { return 128; }
261cb14a3feSDimitry Andric 
262cb14a3feSDimitry Andric   /// How much before a load we should place the prefetch instruction.
263cb14a3feSDimitry Andric   /// This is currently measured in number of IR instructions.
264cb14a3feSDimitry Andric   unsigned getPrefetchDistance() const override;
265cb14a3feSDimitry Andric 
266cb14a3feSDimitry Andric   /// \return if target want to issue a prefetch in address space \p AS.
267cb14a3feSDimitry Andric   bool shouldPrefetchAddressSpace(unsigned AS) const override;
2680b57cec5SDimitry Andric };
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric } // end namespace llvm
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
273