1349cc55cSDimitry Andric //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===// 2349cc55cSDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6349cc55cSDimitry Andric // 7349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8349cc55cSDimitry Andric // 9349cc55cSDimitry Andric // \file 10349cc55cSDimitry Andric // This file implements a TargetTransformInfo analysis pass specific to the 11349cc55cSDimitry Andric // R600 target machine. It uses the target's detailed information to provide 12349cc55cSDimitry Andric // more precise answers to certain TTI queries, while letting the target 13349cc55cSDimitry Andric // independent and default TTI implementations handle the rest. 14349cc55cSDimitry Andric // 15349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 16349cc55cSDimitry Andric 17349cc55cSDimitry Andric #include "R600TargetTransformInfo.h" 18349cc55cSDimitry Andric #include "AMDGPU.h" 19349cc55cSDimitry Andric #include "AMDGPUTargetMachine.h" 20349cc55cSDimitry Andric #include "R600Subtarget.h" 21349cc55cSDimitry Andric 22349cc55cSDimitry Andric using namespace llvm; 23349cc55cSDimitry Andric 24349cc55cSDimitry Andric #define DEBUG_TYPE "R600tti" 25349cc55cSDimitry Andric 26349cc55cSDimitry Andric R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 27*0fca6ea1SDimitry Andric : BaseT(TM, F.getDataLayout()), 28349cc55cSDimitry Andric ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))), 29349cc55cSDimitry Andric TLI(ST->getTargetLowering()), CommonTTI(TM, F) {} 30349cc55cSDimitry Andric 31349cc55cSDimitry Andric unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { 32349cc55cSDimitry Andric return 4 * 128; // XXX - 4 channels. Should these count as vector instead? 33349cc55cSDimitry Andric } 34349cc55cSDimitry Andric 35349cc55cSDimitry Andric unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const { 36349cc55cSDimitry Andric return getHardwareNumberOfRegisters(Vec); 37349cc55cSDimitry Andric } 38349cc55cSDimitry Andric 39349cc55cSDimitry Andric TypeSize 40349cc55cSDimitry Andric R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 41349cc55cSDimitry Andric return TypeSize::getFixed(32); 42349cc55cSDimitry Andric } 43349cc55cSDimitry Andric 44349cc55cSDimitry Andric unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; } 45349cc55cSDimitry Andric 46349cc55cSDimitry Andric unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { 47349cc55cSDimitry Andric if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || 48349cc55cSDimitry Andric AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) 49349cc55cSDimitry Andric return 128; 50349cc55cSDimitry Andric if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || 51349cc55cSDimitry Andric AddrSpace == AMDGPUAS::REGION_ADDRESS) 52349cc55cSDimitry Andric return 64; 53349cc55cSDimitry Andric if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) 54349cc55cSDimitry Andric return 32; 55349cc55cSDimitry Andric 56349cc55cSDimitry Andric if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || 57349cc55cSDimitry Andric AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || 58349cc55cSDimitry Andric (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && 59349cc55cSDimitry Andric AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) 60349cc55cSDimitry Andric return 128; 61349cc55cSDimitry Andric llvm_unreachable("unhandled address space"); 62349cc55cSDimitry Andric } 63349cc55cSDimitry Andric 64349cc55cSDimitry Andric bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, 65349cc55cSDimitry Andric Align Alignment, 66349cc55cSDimitry Andric unsigned AddrSpace) const { 67349cc55cSDimitry Andric // We allow vectorization of flat stores, even though we may need to decompose 68349cc55cSDimitry Andric // them later if they may access private memory. We don't have enough context 69349cc55cSDimitry Andric // here, and legalization can handle it. 70349cc55cSDimitry Andric return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); 71349cc55cSDimitry Andric } 72349cc55cSDimitry Andric 73349cc55cSDimitry Andric bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 74349cc55cSDimitry Andric Align Alignment, 75349cc55cSDimitry Andric unsigned AddrSpace) const { 76349cc55cSDimitry Andric return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); 77349cc55cSDimitry Andric } 78349cc55cSDimitry Andric 79349cc55cSDimitry Andric bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 80349cc55cSDimitry Andric Align Alignment, 81349cc55cSDimitry Andric unsigned AddrSpace) const { 82349cc55cSDimitry Andric return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); 83349cc55cSDimitry Andric } 84349cc55cSDimitry Andric 8506c3fb27SDimitry Andric unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) { 86349cc55cSDimitry Andric // Disable unrolling if the loop is not vectorized. 87349cc55cSDimitry Andric // TODO: Enable this again. 8806c3fb27SDimitry Andric if (VF.isScalar()) 89349cc55cSDimitry Andric return 1; 90349cc55cSDimitry Andric 91349cc55cSDimitry Andric return 8; 92349cc55cSDimitry Andric } 93349cc55cSDimitry Andric 94349cc55cSDimitry Andric InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode, 95349cc55cSDimitry Andric TTI::TargetCostKind CostKind, 96349cc55cSDimitry Andric const Instruction *I) { 97349cc55cSDimitry Andric if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) 98349cc55cSDimitry Andric return Opcode == Instruction::PHI ? 0 : 1; 99349cc55cSDimitry Andric 100349cc55cSDimitry Andric // XXX - For some reason this isn't called for switch. 101349cc55cSDimitry Andric switch (Opcode) { 102349cc55cSDimitry Andric case Instruction::Br: 103349cc55cSDimitry Andric case Instruction::Ret: 104349cc55cSDimitry Andric return 10; 105349cc55cSDimitry Andric default: 106349cc55cSDimitry Andric return BaseT::getCFInstrCost(Opcode, CostKind, I); 107349cc55cSDimitry Andric } 108349cc55cSDimitry Andric } 109349cc55cSDimitry Andric 110349cc55cSDimitry Andric InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, 111bdd1243dSDimitry Andric TTI::TargetCostKind CostKind, 112bdd1243dSDimitry Andric unsigned Index, Value *Op0, 113bdd1243dSDimitry Andric Value *Op1) { 114349cc55cSDimitry Andric switch (Opcode) { 115349cc55cSDimitry Andric case Instruction::ExtractElement: 116349cc55cSDimitry Andric case Instruction::InsertElement: { 117349cc55cSDimitry Andric unsigned EltSize = 118349cc55cSDimitry Andric DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType()); 119349cc55cSDimitry Andric if (EltSize < 32) { 120bdd1243dSDimitry Andric return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, 121bdd1243dSDimitry Andric Op1); 122349cc55cSDimitry Andric } 123349cc55cSDimitry Andric 124349cc55cSDimitry Andric // Extracts are just reads of a subregister, so are free. Inserts are 125349cc55cSDimitry Andric // considered free because we don't want to have any cost for scalarizing 126349cc55cSDimitry Andric // operations, and we don't have to copy into a different register class. 127349cc55cSDimitry Andric 128349cc55cSDimitry Andric // Dynamic indexing isn't free and is best avoided. 129349cc55cSDimitry Andric return Index == ~0u ? 2 : 0; 130349cc55cSDimitry Andric } 131349cc55cSDimitry Andric default: 132bdd1243dSDimitry Andric return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1); 133349cc55cSDimitry Andric } 134349cc55cSDimitry Andric } 135349cc55cSDimitry Andric 136349cc55cSDimitry Andric void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 137349cc55cSDimitry Andric TTI::UnrollingPreferences &UP, 138349cc55cSDimitry Andric OptimizationRemarkEmitter *ORE) { 139349cc55cSDimitry Andric CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); 140349cc55cSDimitry Andric } 141349cc55cSDimitry Andric 142349cc55cSDimitry Andric void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, 143349cc55cSDimitry Andric TTI::PeelingPreferences &PP) { 144349cc55cSDimitry Andric CommonTTI.getPeelingPreferences(L, SE, PP); 145349cc55cSDimitry Andric } 146