xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// \file
90b57cec5SDimitry Andric /// This file a TargetTransformInfo::Concept conforming object specific to the
100b57cec5SDimitry Andric /// NVPTX target machine. It uses the target's detailed information to
110b57cec5SDimitry Andric /// provide more precise answers to certain TTI queries, while letting the
120b57cec5SDimitry Andric /// target independent and default TTI implementations handle the rest.
130b57cec5SDimitry Andric ///
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
170b57cec5SDimitry Andric #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric #include "NVPTXTargetMachine.h"
200b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXBaseInfo.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/BasicTTIImpl.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
24bdd1243dSDimitry Andric #include <optional>
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric namespace llvm {
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
290b57cec5SDimitry Andric   typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
300b57cec5SDimitry Andric   typedef TargetTransformInfo TTI;
310b57cec5SDimitry Andric   friend BaseT;
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric   const NVPTXSubtarget *ST;
340b57cec5SDimitry Andric   const NVPTXTargetLowering *TLI;
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric   const NVPTXSubtarget *getST() const { return ST; };
370b57cec5SDimitry Andric   const NVPTXTargetLowering *getTLI() const { return TLI; };
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric public:
400b57cec5SDimitry Andric   explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
41*0fca6ea1SDimitry Andric       : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),
420b57cec5SDimitry Andric         TLI(ST->getTargetLowering()) {}
430b57cec5SDimitry Andric 
4406c3fb27SDimitry Andric   bool hasBranchDivergence(const Function *F = nullptr) { return true; }
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   bool isSourceOfDivergence(const Value *V);
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric   unsigned getFlatAddressSpace() const {
490b57cec5SDimitry Andric     return AddressSpace::ADDRESS_SPACE_GENERIC;
500b57cec5SDimitry Andric   }
510b57cec5SDimitry Andric 
52349cc55cSDimitry Andric   bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
53349cc55cSDimitry Andric     return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
54349cc55cSDimitry Andric            AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
55349cc55cSDimitry Andric   }
56349cc55cSDimitry Andric 
57bdd1243dSDimitry Andric   std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
58e8d8bef9SDimitry Andric                                                     IntrinsicInst &II) const;
59e8d8bef9SDimitry Andric 
600b57cec5SDimitry Andric   // Loads and stores can be vectorized if the alignment is at least as big as
610b57cec5SDimitry Andric   // the load/store we want to vectorize.
625ffd83dbSDimitry Andric   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
630b57cec5SDimitry Andric                                    unsigned AddrSpace) const {
640b57cec5SDimitry Andric     return Alignment >= ChainSizeInBytes;
650b57cec5SDimitry Andric   }
665ffd83dbSDimitry Andric   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
670b57cec5SDimitry Andric                                     unsigned AddrSpace) const {
680b57cec5SDimitry Andric     return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
690b57cec5SDimitry Andric   }
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric   // NVPTX has infinite registers of all kinds, but the actual machine doesn't.
720b57cec5SDimitry Andric   // We conservatively return 1 here which is just enough to enable the
730b57cec5SDimitry Andric   // vectorizers but disables heuristics based on the number of registers.
740b57cec5SDimitry Andric   // FIXME: Return a more reasonable number, while keeping an eye on
750b57cec5SDimitry Andric   // LoopVectorizer's unrolling heuristics.
760b57cec5SDimitry Andric   unsigned getNumberOfRegisters(bool Vector) const { return 1; }
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   // Only <2 x half> should be vectorized, so always return 32 for the vector
790b57cec5SDimitry Andric   // register size.
80fe6060f1SDimitry Andric   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
81fe6060f1SDimitry Andric     return TypeSize::getFixed(32);
82fe6060f1SDimitry Andric   }
830b57cec5SDimitry Andric   unsigned getMinVectorRegisterBitWidth() const { return 32; }
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric   // We don't want to prevent inlining because of target-cpu and -features
860b57cec5SDimitry Andric   // attributes that were added to newer versions of LLVM/Clang: There are
870b57cec5SDimitry Andric   // no incompatible functions in PTX, ptxas will throw errors in such cases.
880b57cec5SDimitry Andric   bool areInlineCompatible(const Function *Caller,
890b57cec5SDimitry Andric                            const Function *Callee) const {
900b57cec5SDimitry Andric     return true;
910b57cec5SDimitry Andric   }
920b57cec5SDimitry Andric 
9306c3fb27SDimitry Andric   // Increase the inlining cost threshold by a factor of 11, reflecting that
940b57cec5SDimitry Andric   // calls are particularly expensive in NVPTX.
9506c3fb27SDimitry Andric   unsigned getInliningThresholdMultiplier() const { return 11; }
960b57cec5SDimitry Andric 
97fe6060f1SDimitry Andric   InstructionCost getArithmeticInstrCost(
98349cc55cSDimitry Andric       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
99bdd1243dSDimitry Andric       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
100bdd1243dSDimitry Andric       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
101*0fca6ea1SDimitry Andric       ArrayRef<const Value *> Args = std::nullopt,
102480093f4SDimitry Andric       const Instruction *CxtI = nullptr);
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
105349cc55cSDimitry Andric                                TTI::UnrollingPreferences &UP,
106349cc55cSDimitry Andric                                OptimizationRemarkEmitter *ORE);
1075ffd83dbSDimitry Andric 
1085ffd83dbSDimitry Andric   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1095ffd83dbSDimitry Andric                              TTI::PeelingPreferences &PP);
1105ffd83dbSDimitry Andric 
1110b57cec5SDimitry Andric   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
1120b57cec5SDimitry Andric     // Volatile loads/stores are only supported for shared and global address
1130b57cec5SDimitry Andric     // spaces, or for generic AS that maps to them.
1140b57cec5SDimitry Andric     if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
1150b57cec5SDimitry Andric           AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
1160b57cec5SDimitry Andric           AddrSpace == llvm::ADDRESS_SPACE_SHARED))
1170b57cec5SDimitry Andric       return false;
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric     switch(I->getOpcode()){
1200b57cec5SDimitry Andric     default:
1210b57cec5SDimitry Andric       return false;
1220b57cec5SDimitry Andric     case Instruction::Load:
1230b57cec5SDimitry Andric     case Instruction::Store:
1240b57cec5SDimitry Andric       return true;
1250b57cec5SDimitry Andric     }
1260b57cec5SDimitry Andric   }
1270b57cec5SDimitry Andric };
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric } // end namespace llvm
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric #endif
132