xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1349cc55cSDimitry Andric //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric //
9349cc55cSDimitry Andric // \file
10349cc55cSDimitry Andric // This file implements a TargetTransformInfo analysis pass specific to the
11349cc55cSDimitry Andric // R600 target machine. It uses the target's detailed information to provide
12349cc55cSDimitry Andric // more precise answers to certain TTI queries, while letting the target
13349cc55cSDimitry Andric // independent and default TTI implementations handle the rest.
14349cc55cSDimitry Andric //
15349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
16349cc55cSDimitry Andric 
17349cc55cSDimitry Andric #include "R600TargetTransformInfo.h"
18349cc55cSDimitry Andric #include "AMDGPU.h"
19349cc55cSDimitry Andric #include "AMDGPUTargetMachine.h"
20349cc55cSDimitry Andric #include "R600Subtarget.h"
21349cc55cSDimitry Andric 
22349cc55cSDimitry Andric using namespace llvm;
23349cc55cSDimitry Andric 
24349cc55cSDimitry Andric #define DEBUG_TYPE "R600tti"
25349cc55cSDimitry Andric 
26349cc55cSDimitry Andric R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
27*0fca6ea1SDimitry Andric     : BaseT(TM, F.getDataLayout()),
28349cc55cSDimitry Andric       ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
29349cc55cSDimitry Andric       TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
30349cc55cSDimitry Andric 
31349cc55cSDimitry Andric unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
32349cc55cSDimitry Andric   return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
33349cc55cSDimitry Andric }
34349cc55cSDimitry Andric 
35349cc55cSDimitry Andric unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
36349cc55cSDimitry Andric   return getHardwareNumberOfRegisters(Vec);
37349cc55cSDimitry Andric }
38349cc55cSDimitry Andric 
39349cc55cSDimitry Andric TypeSize
40349cc55cSDimitry Andric R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
41349cc55cSDimitry Andric   return TypeSize::getFixed(32);
42349cc55cSDimitry Andric }
43349cc55cSDimitry Andric 
44349cc55cSDimitry Andric unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
45349cc55cSDimitry Andric 
46349cc55cSDimitry Andric unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
47349cc55cSDimitry Andric   if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
48349cc55cSDimitry Andric       AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
49349cc55cSDimitry Andric     return 128;
50349cc55cSDimitry Andric   if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
51349cc55cSDimitry Andric       AddrSpace == AMDGPUAS::REGION_ADDRESS)
52349cc55cSDimitry Andric     return 64;
53349cc55cSDimitry Andric   if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
54349cc55cSDimitry Andric     return 32;
55349cc55cSDimitry Andric 
56349cc55cSDimitry Andric   if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
57349cc55cSDimitry Andric        AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
58349cc55cSDimitry Andric        (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
59349cc55cSDimitry Andric         AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
60349cc55cSDimitry Andric     return 128;
61349cc55cSDimitry Andric   llvm_unreachable("unhandled address space");
62349cc55cSDimitry Andric }
63349cc55cSDimitry Andric 
64349cc55cSDimitry Andric bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
65349cc55cSDimitry Andric                                              Align Alignment,
66349cc55cSDimitry Andric                                              unsigned AddrSpace) const {
67349cc55cSDimitry Andric   // We allow vectorization of flat stores, even though we may need to decompose
68349cc55cSDimitry Andric   // them later if they may access private memory. We don't have enough context
69349cc55cSDimitry Andric   // here, and legalization can handle it.
70349cc55cSDimitry Andric   return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
71349cc55cSDimitry Andric }
72349cc55cSDimitry Andric 
73349cc55cSDimitry Andric bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
74349cc55cSDimitry Andric                                               Align Alignment,
75349cc55cSDimitry Andric                                               unsigned AddrSpace) const {
76349cc55cSDimitry Andric   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
77349cc55cSDimitry Andric }
78349cc55cSDimitry Andric 
79349cc55cSDimitry Andric bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
80349cc55cSDimitry Andric                                                Align Alignment,
81349cc55cSDimitry Andric                                                unsigned AddrSpace) const {
82349cc55cSDimitry Andric   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
83349cc55cSDimitry Andric }
84349cc55cSDimitry Andric 
8506c3fb27SDimitry Andric unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
86349cc55cSDimitry Andric   // Disable unrolling if the loop is not vectorized.
87349cc55cSDimitry Andric   // TODO: Enable this again.
8806c3fb27SDimitry Andric   if (VF.isScalar())
89349cc55cSDimitry Andric     return 1;
90349cc55cSDimitry Andric 
91349cc55cSDimitry Andric   return 8;
92349cc55cSDimitry Andric }
93349cc55cSDimitry Andric 
94349cc55cSDimitry Andric InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
95349cc55cSDimitry Andric                                             TTI::TargetCostKind CostKind,
96349cc55cSDimitry Andric                                             const Instruction *I) {
97349cc55cSDimitry Andric   if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
98349cc55cSDimitry Andric     return Opcode == Instruction::PHI ? 0 : 1;
99349cc55cSDimitry Andric 
100349cc55cSDimitry Andric   // XXX - For some reason this isn't called for switch.
101349cc55cSDimitry Andric   switch (Opcode) {
102349cc55cSDimitry Andric   case Instruction::Br:
103349cc55cSDimitry Andric   case Instruction::Ret:
104349cc55cSDimitry Andric     return 10;
105349cc55cSDimitry Andric   default:
106349cc55cSDimitry Andric     return BaseT::getCFInstrCost(Opcode, CostKind, I);
107349cc55cSDimitry Andric   }
108349cc55cSDimitry Andric }
109349cc55cSDimitry Andric 
110349cc55cSDimitry Andric InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
111bdd1243dSDimitry Andric                                                 TTI::TargetCostKind CostKind,
112bdd1243dSDimitry Andric                                                 unsigned Index, Value *Op0,
113bdd1243dSDimitry Andric                                                 Value *Op1) {
114349cc55cSDimitry Andric   switch (Opcode) {
115349cc55cSDimitry Andric   case Instruction::ExtractElement:
116349cc55cSDimitry Andric   case Instruction::InsertElement: {
117349cc55cSDimitry Andric     unsigned EltSize =
118349cc55cSDimitry Andric         DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
119349cc55cSDimitry Andric     if (EltSize < 32) {
120bdd1243dSDimitry Andric       return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0,
121bdd1243dSDimitry Andric                                        Op1);
122349cc55cSDimitry Andric     }
123349cc55cSDimitry Andric 
124349cc55cSDimitry Andric     // Extracts are just reads of a subregister, so are free. Inserts are
125349cc55cSDimitry Andric     // considered free because we don't want to have any cost for scalarizing
126349cc55cSDimitry Andric     // operations, and we don't have to copy into a different register class.
127349cc55cSDimitry Andric 
128349cc55cSDimitry Andric     // Dynamic indexing isn't free and is best avoided.
129349cc55cSDimitry Andric     return Index == ~0u ? 2 : 0;
130349cc55cSDimitry Andric   }
131349cc55cSDimitry Andric   default:
132bdd1243dSDimitry Andric     return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
133349cc55cSDimitry Andric   }
134349cc55cSDimitry Andric }
135349cc55cSDimitry Andric 
136349cc55cSDimitry Andric void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
137349cc55cSDimitry Andric                                           TTI::UnrollingPreferences &UP,
138349cc55cSDimitry Andric                                           OptimizationRemarkEmitter *ORE) {
139349cc55cSDimitry Andric   CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
140349cc55cSDimitry Andric }
141349cc55cSDimitry Andric 
142349cc55cSDimitry Andric void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
143349cc55cSDimitry Andric                                         TTI::PeelingPreferences &PP) {
144349cc55cSDimitry Andric   CommonTTI.getPeelingPreferences(L, SE, PP);
145349cc55cSDimitry Andric }
146