xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "RISCVTargetTransformInfo.h"
10 #include "MCTargetDesc/RISCVMatInt.h"
11 #include "llvm/Analysis/TargetTransformInfo.h"
12 #include "llvm/CodeGen/BasicTTIImpl.h"
13 #include "llvm/CodeGen/TargetLowering.h"
14 using namespace llvm;
15 
16 #define DEBUG_TYPE "riscvtti"
17 
18 InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
19                                             TTI::TargetCostKind CostKind) {
20   assert(Ty->isIntegerTy() &&
21          "getIntImmCost can only estimate cost of materialising integers");
22 
23   // We have a Zero register, so 0 is always free.
24   if (Imm == 0)
25     return TTI::TCC_Free;
26 
27   // Otherwise, we check how many instructions it will take to materialise.
28   const DataLayout &DL = getDataLayout();
29   return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty),
30                                     getST()->getFeatureBits());
31 }
32 
33 InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
34                                                 const APInt &Imm, Type *Ty,
35                                                 TTI::TargetCostKind CostKind,
36                                                 Instruction *Inst) {
37   assert(Ty->isIntegerTy() &&
38          "getIntImmCost can only estimate cost of materialising integers");
39 
40   // We have a Zero register, so 0 is always free.
41   if (Imm == 0)
42     return TTI::TCC_Free;
43 
44   // Some instructions in RISC-V can take a 12-bit immediate. Some of these are
45   // commutative, in others the immediate comes from a specific argument index.
46   bool Takes12BitImm = false;
47   unsigned ImmArgIdx = ~0U;
48 
49   switch (Opcode) {
50   case Instruction::GetElementPtr:
51     // Never hoist any arguments to a GetElementPtr. CodeGenPrepare will
52     // split up large offsets in GEP into better parts than ConstantHoisting
53     // can.
54     return TTI::TCC_Free;
55   case Instruction::And:
56     // zext.h
57     if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
58       return TTI::TCC_Free;
59     // zext.w
60     if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZbb())
61       return TTI::TCC_Free;
62     LLVM_FALLTHROUGH;
63   case Instruction::Add:
64   case Instruction::Or:
65   case Instruction::Xor:
66   case Instruction::Mul:
67     Takes12BitImm = true;
68     break;
69   case Instruction::Sub:
70   case Instruction::Shl:
71   case Instruction::LShr:
72   case Instruction::AShr:
73     Takes12BitImm = true;
74     ImmArgIdx = 1;
75     break;
76   default:
77     break;
78   }
79 
80   if (Takes12BitImm) {
81     // Check immediate is the correct argument...
82     if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) {
83       // ... and fits into the 12-bit immediate.
84       if (Imm.getMinSignedBits() <= 64 &&
85           getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {
86         return TTI::TCC_Free;
87       }
88     }
89 
90     // Otherwise, use the full materialisation cost.
91     return getIntImmCost(Imm, Ty, CostKind);
92   }
93 
94   // By default, prevent hoisting.
95   return TTI::TCC_Free;
96 }
97 
98 InstructionCost
99 RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
100                                   const APInt &Imm, Type *Ty,
101                                   TTI::TargetCostKind CostKind) {
102   // Prevent hoisting in unknown cases.
103   return TTI::TCC_Free;
104 }
105 
106 TargetTransformInfo::PopcntSupportKind
107 RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) {
108   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
109   return ST->hasStdExtZbb() ? TTI::PSK_FastHardware : TTI::PSK_Software;
110 }
111 
112 bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
113   // Currently, the ExpandReductions pass can't expand scalable-vector
114   // reductions, but we still request expansion as RVV doesn't support certain
115   // reductions and the SelectionDAG can't legalize them either.
116   switch (II->getIntrinsicID()) {
117   default:
118     return false;
119   // These reductions have no equivalent in RVV
120   case Intrinsic::vector_reduce_mul:
121   case Intrinsic::vector_reduce_fmul:
122     return true;
123   }
124 }
125 
126 Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
127   // There is no assumption of the maximum vector length in V specification.
128   // We use the value specified by users as the maximum vector length.
129   // This function will use the assumed maximum vector length to get the
130   // maximum vscale for LoopVectorizer.
131   // If users do not specify the maximum vector length, we have no way to
132   // know whether the LoopVectorizer is safe to do or not.
133   // We only consider to use single vector register (LMUL = 1) to vectorize.
134   unsigned MaxVectorSizeInBits = ST->getMaxRVVVectorSizeInBits();
135   if (ST->hasVInstructions() && MaxVectorSizeInBits != 0)
136     return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock;
137   return BaseT::getMaxVScale();
138 }
139 
140 InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
141     unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
142     Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
143   if (CostKind != TTI::TCK_RecipThroughput)
144     return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
145                                          Alignment, CostKind, I);
146 
147   if ((Opcode == Instruction::Load &&
148        !isLegalMaskedGather(DataTy, Align(Alignment))) ||
149       (Opcode == Instruction::Store &&
150        !isLegalMaskedScatter(DataTy, Align(Alignment))))
151     return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
152                                          Alignment, CostKind, I);
153 
154   // FIXME: Only supporting fixed vectors for now.
155   if (!isa<FixedVectorType>(DataTy))
156     return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
157                                          Alignment, CostKind, I);
158 
159   auto *VTy = cast<FixedVectorType>(DataTy);
160   unsigned NumLoads = VTy->getNumElements();
161   InstructionCost MemOpCost =
162       getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, CostKind, I);
163   return NumLoads * MemOpCost;
164 }
165 
166 void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
167                                            TTI::UnrollingPreferences &UP,
168                                            OptimizationRemarkEmitter *ORE) {
169   // TODO: More tuning on benchmarks and metrics with changes as needed
170   //       would apply to all settings below to enable performance.
171 
172   // Support explicit targets enabled for SiFive with the unrolling preferences
173   // below
174   bool UseDefaultPreferences = true;
175   if (ST->getTuneCPU().contains("sifive-e76") ||
176       ST->getTuneCPU().contains("sifive-s76") ||
177       ST->getTuneCPU().contains("sifive-u74") ||
178       ST->getTuneCPU().contains("sifive-7"))
179     UseDefaultPreferences = false;
180 
181   if (UseDefaultPreferences)
182     return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
183 
184   // Enable Upper bound unrolling universally, not dependant upon the conditions
185   // below.
186   UP.UpperBound = true;
187 
188   // Disable loop unrolling for Oz and Os.
189   UP.OptSizeThreshold = 0;
190   UP.PartialOptSizeThreshold = 0;
191   if (L->getHeader()->getParent()->hasOptSize())
192     return;
193 
194   SmallVector<BasicBlock *, 4> ExitingBlocks;
195   L->getExitingBlocks(ExitingBlocks);
196   LLVM_DEBUG(dbgs() << "Loop has:\n"
197                     << "Blocks: " << L->getNumBlocks() << "\n"
198                     << "Exit blocks: " << ExitingBlocks.size() << "\n");
199 
200   // Only allow another exit other than the latch. This acts as an early exit
201   // as it mirrors the profitability calculation of the runtime unroller.
202   if (ExitingBlocks.size() > 2)
203     return;
204 
205   // Limit the CFG of the loop body for targets with a branch predictor.
206   // Allowing 4 blocks permits if-then-else diamonds in the body.
207   if (L->getNumBlocks() > 4)
208     return;
209 
210   // Don't unroll vectorized loops, including the remainder loop
211   if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
212     return;
213 
214   // Scan the loop: don't unroll loops with calls as this could prevent
215   // inlining.
216   InstructionCost Cost = 0;
217   for (auto *BB : L->getBlocks()) {
218     for (auto &I : *BB) {
219       // Initial setting - Don't unroll loops containing vectorized
220       // instructions.
221       if (I.getType()->isVectorTy())
222         return;
223 
224       if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
225         if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
226           if (!isLoweredToCall(F))
227             continue;
228         }
229         return;
230       }
231 
232       SmallVector<const Value *> Operands(I.operand_values());
233       Cost +=
234           getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
235     }
236   }
237 
238   LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
239 
240   UP.Partial = true;
241   UP.Runtime = true;
242   UP.UnrollRemainder = true;
243   UP.UnrollAndJam = true;
244   UP.UnrollAndJamInnerLoopThreshold = 60;
245 
246   // Force unrolling small loops can be very useful because of the branch
247   // taken cost of the backedge.
248   if (Cost < 12)
249     UP.Force = true;
250 }
251 
252 void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
253                                          TTI::PeelingPreferences &PP) {
254   BaseT::getPeelingPreferences(L, SE, PP);
255 }
256