1 //===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "RISCVTargetTransformInfo.h" 10 #include "MCTargetDesc/RISCVMatInt.h" 11 #include "llvm/Analysis/TargetTransformInfo.h" 12 #include "llvm/CodeGen/BasicTTIImpl.h" 13 #include "llvm/CodeGen/TargetLowering.h" 14 using namespace llvm; 15 16 #define DEBUG_TYPE "riscvtti" 17 18 InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, 19 TTI::TargetCostKind CostKind) { 20 assert(Ty->isIntegerTy() && 21 "getIntImmCost can only estimate cost of materialising integers"); 22 23 // We have a Zero register, so 0 is always free. 24 if (Imm == 0) 25 return TTI::TCC_Free; 26 27 // Otherwise, we check how many instructions it will take to materialise. 28 const DataLayout &DL = getDataLayout(); 29 return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), 30 getST()->getFeatureBits()); 31 } 32 33 InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, 34 const APInt &Imm, Type *Ty, 35 TTI::TargetCostKind CostKind, 36 Instruction *Inst) { 37 assert(Ty->isIntegerTy() && 38 "getIntImmCost can only estimate cost of materialising integers"); 39 40 // We have a Zero register, so 0 is always free. 41 if (Imm == 0) 42 return TTI::TCC_Free; 43 44 // Some instructions in RISC-V can take a 12-bit immediate. Some of these are 45 // commutative, in others the immediate comes from a specific argument index. 46 bool Takes12BitImm = false; 47 unsigned ImmArgIdx = ~0U; 48 49 switch (Opcode) { 50 case Instruction::GetElementPtr: 51 // Never hoist any arguments to a GetElementPtr. CodeGenPrepare will 52 // split up large offsets in GEP into better parts than ConstantHoisting 53 // can. 54 return TTI::TCC_Free; 55 case Instruction::And: 56 // zext.h 57 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb()) 58 return TTI::TCC_Free; 59 // zext.w 60 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZbb()) 61 return TTI::TCC_Free; 62 LLVM_FALLTHROUGH; 63 case Instruction::Add: 64 case Instruction::Or: 65 case Instruction::Xor: 66 case Instruction::Mul: 67 Takes12BitImm = true; 68 break; 69 case Instruction::Sub: 70 case Instruction::Shl: 71 case Instruction::LShr: 72 case Instruction::AShr: 73 Takes12BitImm = true; 74 ImmArgIdx = 1; 75 break; 76 default: 77 break; 78 } 79 80 if (Takes12BitImm) { 81 // Check immediate is the correct argument... 82 if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) { 83 // ... and fits into the 12-bit immediate. 84 if (Imm.getMinSignedBits() <= 64 && 85 getTLI()->isLegalAddImmediate(Imm.getSExtValue())) { 86 return TTI::TCC_Free; 87 } 88 } 89 90 // Otherwise, use the full materialisation cost. 91 return getIntImmCost(Imm, Ty, CostKind); 92 } 93 94 // By default, prevent hoisting. 95 return TTI::TCC_Free; 96 } 97 98 InstructionCost 99 RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 100 const APInt &Imm, Type *Ty, 101 TTI::TargetCostKind CostKind) { 102 // Prevent hoisting in unknown cases. 103 return TTI::TCC_Free; 104 } 105 106 TargetTransformInfo::PopcntSupportKind 107 RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) { 108 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 109 return ST->hasStdExtZbb() ? TTI::PSK_FastHardware : TTI::PSK_Software; 110 } 111 112 bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { 113 // Currently, the ExpandReductions pass can't expand scalable-vector 114 // reductions, but we still request expansion as RVV doesn't support certain 115 // reductions and the SelectionDAG can't legalize them either. 116 switch (II->getIntrinsicID()) { 117 default: 118 return false; 119 // These reductions have no equivalent in RVV 120 case Intrinsic::vector_reduce_mul: 121 case Intrinsic::vector_reduce_fmul: 122 return true; 123 } 124 } 125 126 Optional<unsigned> RISCVTTIImpl::getMaxVScale() const { 127 // There is no assumption of the maximum vector length in V specification. 128 // We use the value specified by users as the maximum vector length. 129 // This function will use the assumed maximum vector length to get the 130 // maximum vscale for LoopVectorizer. 131 // If users do not specify the maximum vector length, we have no way to 132 // know whether the LoopVectorizer is safe to do or not. 133 // We only consider to use single vector register (LMUL = 1) to vectorize. 134 unsigned MaxVectorSizeInBits = ST->getMaxRVVVectorSizeInBits(); 135 if (ST->hasVInstructions() && MaxVectorSizeInBits != 0) 136 return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock; 137 return BaseT::getMaxVScale(); 138 } 139 140 InstructionCost RISCVTTIImpl::getGatherScatterOpCost( 141 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, 142 Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { 143 if (CostKind != TTI::TCK_RecipThroughput) 144 return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, 145 Alignment, CostKind, I); 146 147 if ((Opcode == Instruction::Load && 148 !isLegalMaskedGather(DataTy, Align(Alignment))) || 149 (Opcode == Instruction::Store && 150 !isLegalMaskedScatter(DataTy, Align(Alignment)))) 151 return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, 152 Alignment, CostKind, I); 153 154 // FIXME: Only supporting fixed vectors for now. 155 if (!isa<FixedVectorType>(DataTy)) 156 return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, 157 Alignment, CostKind, I); 158 159 auto *VTy = cast<FixedVectorType>(DataTy); 160 unsigned NumLoads = VTy->getNumElements(); 161 InstructionCost MemOpCost = 162 getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, CostKind, I); 163 return NumLoads * MemOpCost; 164 } 165 166 void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 167 TTI::UnrollingPreferences &UP, 168 OptimizationRemarkEmitter *ORE) { 169 // TODO: More tuning on benchmarks and metrics with changes as needed 170 // would apply to all settings below to enable performance. 171 172 // Support explicit targets enabled for SiFive with the unrolling preferences 173 // below 174 bool UseDefaultPreferences = true; 175 if (ST->getTuneCPU().contains("sifive-e76") || 176 ST->getTuneCPU().contains("sifive-s76") || 177 ST->getTuneCPU().contains("sifive-u74") || 178 ST->getTuneCPU().contains("sifive-7")) 179 UseDefaultPreferences = false; 180 181 if (UseDefaultPreferences) 182 return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE); 183 184 // Enable Upper bound unrolling universally, not dependant upon the conditions 185 // below. 186 UP.UpperBound = true; 187 188 // Disable loop unrolling for Oz and Os. 189 UP.OptSizeThreshold = 0; 190 UP.PartialOptSizeThreshold = 0; 191 if (L->getHeader()->getParent()->hasOptSize()) 192 return; 193 194 SmallVector<BasicBlock *, 4> ExitingBlocks; 195 L->getExitingBlocks(ExitingBlocks); 196 LLVM_DEBUG(dbgs() << "Loop has:\n" 197 << "Blocks: " << L->getNumBlocks() << "\n" 198 << "Exit blocks: " << ExitingBlocks.size() << "\n"); 199 200 // Only allow another exit other than the latch. This acts as an early exit 201 // as it mirrors the profitability calculation of the runtime unroller. 202 if (ExitingBlocks.size() > 2) 203 return; 204 205 // Limit the CFG of the loop body for targets with a branch predictor. 206 // Allowing 4 blocks permits if-then-else diamonds in the body. 207 if (L->getNumBlocks() > 4) 208 return; 209 210 // Don't unroll vectorized loops, including the remainder loop 211 if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) 212 return; 213 214 // Scan the loop: don't unroll loops with calls as this could prevent 215 // inlining. 216 InstructionCost Cost = 0; 217 for (auto *BB : L->getBlocks()) { 218 for (auto &I : *BB) { 219 // Initial setting - Don't unroll loops containing vectorized 220 // instructions. 221 if (I.getType()->isVectorTy()) 222 return; 223 224 if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 225 if (const Function *F = cast<CallBase>(I).getCalledFunction()) { 226 if (!isLoweredToCall(F)) 227 continue; 228 } 229 return; 230 } 231 232 SmallVector<const Value *> Operands(I.operand_values()); 233 Cost += 234 getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); 235 } 236 } 237 238 LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n"); 239 240 UP.Partial = true; 241 UP.Runtime = true; 242 UP.UnrollRemainder = true; 243 UP.UnrollAndJam = true; 244 UP.UnrollAndJamInnerLoopThreshold = 60; 245 246 // Force unrolling small loops can be very useful because of the branch 247 // taken cost of the backedge. 248 if (Cost < 12) 249 UP.Force = true; 250 } 251 252 void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, 253 TTI::PeelingPreferences &PP) { 254 BaseT::getPeelingPreferences(L, SE, PP); 255 } 256