10b57cec5SDimitry Andric //===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// \file 90b57cec5SDimitry Andric /// This file a TargetTransformInfo::Concept conforming object specific to the 100b57cec5SDimitry Andric /// NVPTX target machine. It uses the target's detailed information to 110b57cec5SDimitry Andric /// provide more precise answers to certain TTI queries, while letting the 120b57cec5SDimitry Andric /// target independent and default TTI implementations handle the rest. 130b57cec5SDimitry Andric /// 140b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H 170b57cec5SDimitry Andric #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric #include "NVPTXTargetMachine.h" 200b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXBaseInfo.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 220b57cec5SDimitry Andric #include "llvm/CodeGen/BasicTTIImpl.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 24bdd1243dSDimitry Andric #include <optional> 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric namespace llvm { 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> { 290b57cec5SDimitry Andric typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT; 300b57cec5SDimitry Andric typedef TargetTransformInfo TTI; 310b57cec5SDimitry Andric friend BaseT; 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric const NVPTXSubtarget *ST; 340b57cec5SDimitry Andric const NVPTXTargetLowering *TLI; 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric const NVPTXSubtarget *getST() const { return ST; }; 370b57cec5SDimitry Andric const NVPTXTargetLowering *getTLI() const { return TLI; }; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric public: 400b57cec5SDimitry Andric explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F) 41*0fca6ea1SDimitry Andric : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()), 420b57cec5SDimitry Andric TLI(ST->getTargetLowering()) {} 430b57cec5SDimitry Andric 4406c3fb27SDimitry Andric bool hasBranchDivergence(const Function *F = nullptr) { return true; } 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric bool isSourceOfDivergence(const Value *V); 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric unsigned getFlatAddressSpace() const { 490b57cec5SDimitry Andric return AddressSpace::ADDRESS_SPACE_GENERIC; 500b57cec5SDimitry Andric } 510b57cec5SDimitry Andric 52349cc55cSDimitry Andric bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 53349cc55cSDimitry Andric return AS != AddressSpace::ADDRESS_SPACE_SHARED && 54349cc55cSDimitry Andric AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM; 55349cc55cSDimitry Andric } 56349cc55cSDimitry Andric 57bdd1243dSDimitry Andric std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 58e8d8bef9SDimitry Andric IntrinsicInst &II) const; 59e8d8bef9SDimitry Andric 600b57cec5SDimitry Andric // Loads and stores can be vectorized if the alignment is at least as big as 610b57cec5SDimitry Andric // the load/store we want to vectorize. 625ffd83dbSDimitry Andric bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 630b57cec5SDimitry Andric unsigned AddrSpace) const { 640b57cec5SDimitry Andric return Alignment >= ChainSizeInBytes; 650b57cec5SDimitry Andric } 665ffd83dbSDimitry Andric bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 670b57cec5SDimitry Andric unsigned AddrSpace) const { 680b57cec5SDimitry Andric return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace); 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric // NVPTX has infinite registers of all kinds, but the actual machine doesn't. 720b57cec5SDimitry Andric // We conservatively return 1 here which is just enough to enable the 730b57cec5SDimitry Andric // vectorizers but disables heuristics based on the number of registers. 740b57cec5SDimitry Andric // FIXME: Return a more reasonable number, while keeping an eye on 750b57cec5SDimitry Andric // LoopVectorizer's unrolling heuristics. 760b57cec5SDimitry Andric unsigned getNumberOfRegisters(bool Vector) const { return 1; } 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric // Only <2 x half> should be vectorized, so always return 32 for the vector 790b57cec5SDimitry Andric // register size. 80fe6060f1SDimitry Andric TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 81fe6060f1SDimitry Andric return TypeSize::getFixed(32); 82fe6060f1SDimitry Andric } 830b57cec5SDimitry Andric unsigned getMinVectorRegisterBitWidth() const { return 32; } 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric // We don't want to prevent inlining because of target-cpu and -features 860b57cec5SDimitry Andric // attributes that were added to newer versions of LLVM/Clang: There are 870b57cec5SDimitry Andric // no incompatible functions in PTX, ptxas will throw errors in such cases. 880b57cec5SDimitry Andric bool areInlineCompatible(const Function *Caller, 890b57cec5SDimitry Andric const Function *Callee) const { 900b57cec5SDimitry Andric return true; 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric 9306c3fb27SDimitry Andric // Increase the inlining cost threshold by a factor of 11, reflecting that 940b57cec5SDimitry Andric // calls are particularly expensive in NVPTX. 9506c3fb27SDimitry Andric unsigned getInliningThresholdMultiplier() const { return 11; } 960b57cec5SDimitry Andric 97fe6060f1SDimitry Andric InstructionCost getArithmeticInstrCost( 98349cc55cSDimitry Andric unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 99bdd1243dSDimitry Andric TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 100bdd1243dSDimitry Andric TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 101*0fca6ea1SDimitry Andric ArrayRef<const Value *> Args = std::nullopt, 102480093f4SDimitry Andric const Instruction *CxtI = nullptr); 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 105349cc55cSDimitry Andric TTI::UnrollingPreferences &UP, 106349cc55cSDimitry Andric OptimizationRemarkEmitter *ORE); 1075ffd83dbSDimitry Andric 1085ffd83dbSDimitry Andric void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 1095ffd83dbSDimitry Andric TTI::PeelingPreferences &PP); 1105ffd83dbSDimitry Andric 1110b57cec5SDimitry Andric bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { 1120b57cec5SDimitry Andric // Volatile loads/stores are only supported for shared and global address 1130b57cec5SDimitry Andric // spaces, or for generic AS that maps to them. 1140b57cec5SDimitry Andric if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC || 1150b57cec5SDimitry Andric AddrSpace == llvm::ADDRESS_SPACE_GLOBAL || 1160b57cec5SDimitry Andric AddrSpace == llvm::ADDRESS_SPACE_SHARED)) 1170b57cec5SDimitry Andric return false; 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric switch(I->getOpcode()){ 1200b57cec5SDimitry Andric default: 1210b57cec5SDimitry Andric return false; 1220b57cec5SDimitry Andric case Instruction::Load: 1230b57cec5SDimitry Andric case Instruction::Store: 1240b57cec5SDimitry Andric return true; 1250b57cec5SDimitry Andric } 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric }; 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric } // end namespace llvm 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric #endif 132