1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/GetElementPtrTypeIterator.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/Operator.h" 25 #include "llvm/IR/PatternMatch.h" 26 #include "llvm/IR/Type.h" 27 28 using namespace llvm::PatternMatch; 29 30 namespace llvm { 31 32 /// Base class for use as a mix-in that aids implementing 33 /// a TargetTransformInfo-compatible class. 34 class TargetTransformInfoImplBase { 35 protected: 36 typedef TargetTransformInfo TTI; 37 38 const DataLayout &DL; 39 TargetTransformInfoImplBase(const DataLayout & DL)40 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 41 42 public: 43 // Provide value semantics. MSVC requires that we spell all of these out. TargetTransformInfoImplBase(const TargetTransformInfoImplBase & Arg)44 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) 45 : DL(Arg.DL) {} TargetTransformInfoImplBase(TargetTransformInfoImplBase && Arg)46 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 47 getDataLayout()48 const DataLayout &getDataLayout() const { return DL; } 49 50 InstructionCost 51 getGEPCost(Type *PointeeType, const Value *Ptr, 52 ArrayRef<const Value *> Operands, 53 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const { 54 // In the basic model, we just assume that all-constant GEPs will be folded 55 // into their uses via addressing modes. 56 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) 57 if (!isa<Constant>(Operands[Idx])) 58 return TTI::TCC_Basic; 59 60 return TTI::TCC_Free; 61 } 62 getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)63 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 64 unsigned &JTSize, 65 ProfileSummaryInfo *PSI, 66 BlockFrequencyInfo *BFI) const { 67 (void)PSI; 68 (void)BFI; 69 JTSize = 0; 70 return SI.getNumCases(); 71 } 72 getInliningThresholdMultiplier()73 unsigned getInliningThresholdMultiplier() const { return 1; } adjustInliningThreshold(const CallBase * CB)74 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } 75 getInlinerVectorBonusPercent()76 int getInlinerVectorBonusPercent() const { return 150; } 77 getMemcpyCost(const Instruction * I)78 InstructionCost getMemcpyCost(const Instruction *I) const { 79 return TTI::TCC_Expensive; 80 } 81 82 // Although this default value is arbitrary, it is not random. It is assumed 83 // that a condition that evaluates the same way by a higher percentage than 84 // this is best represented as control flow. Therefore, the default value N 85 // should be set such that the win from N% correct executions is greater than 86 // the loss from (100 - N)% mispredicted executions for the majority of 87 // intended targets. getPredictableBranchThreshold()88 BranchProbability getPredictableBranchThreshold() const { 89 return BranchProbability(99, 100); 90 } 91 hasBranchDivergence()92 bool hasBranchDivergence() const { return false; } 93 useGPUDivergenceAnalysis()94 bool useGPUDivergenceAnalysis() const { return false; } 95 isSourceOfDivergence(const Value * V)96 bool isSourceOfDivergence(const Value *V) const { return false; } 97 isAlwaysUniform(const Value * V)98 bool isAlwaysUniform(const Value *V) const { return false; } 99 getFlatAddressSpace()100 unsigned getFlatAddressSpace() const { return -1; } 101 collectFlatAddressOperands(SmallVectorImpl<int> & OpIndexes,Intrinsic::ID IID)102 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 103 Intrinsic::ID IID) const { 104 return false; 105 } 106 isNoopAddrSpaceCast(unsigned,unsigned)107 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } 108 getAssumedAddrSpace(const Value * V)109 unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 110 rewriteIntrinsicWithAddressSpace(IntrinsicInst * II,Value * OldV,Value * NewV)111 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 112 Value *NewV) const { 113 return nullptr; 114 } 115 isLoweredToCall(const Function * F)116 bool isLoweredToCall(const Function *F) const { 117 assert(F && "A concrete function must be provided to this routine."); 118 119 // FIXME: These should almost certainly not be handled here, and instead 120 // handled with the help of TLI or the target itself. This was largely 121 // ported from existing analysis heuristics here so that such refactorings 122 // can take place in the future. 123 124 if (F->isIntrinsic()) 125 return false; 126 127 if (F->hasLocalLinkage() || !F->hasName()) 128 return true; 129 130 StringRef Name = F->getName(); 131 132 // These will all likely lower to a single selection DAG node. 133 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 134 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 135 Name == "fmin" || Name == "fminf" || Name == "fminl" || 136 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 137 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 138 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 139 return false; 140 141 // These are all likely to be optimized into something smaller. 142 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 143 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 144 Name == "floorf" || Name == "ceil" || Name == "round" || 145 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 146 Name == "llabs") 147 return false; 148 149 return true; 150 } 151 isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)152 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 153 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 154 HardwareLoopInfo &HWLoopInfo) const { 155 return false; 156 } 157 preferPredicateOverEpilogue(Loop * L,LoopInfo * LI,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * TLI,DominatorTree * DT,const LoopAccessInfo * LAI)158 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 159 AssumptionCache &AC, TargetLibraryInfo *TLI, 160 DominatorTree *DT, 161 const LoopAccessInfo *LAI) const { 162 return false; 163 } 164 emitGetActiveLaneMask()165 bool emitGetActiveLaneMask() const { 166 return false; 167 } 168 instCombineIntrinsic(InstCombiner & IC,IntrinsicInst & II)169 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 170 IntrinsicInst &II) const { 171 return None; 172 } 173 174 Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedMask,KnownBits & Known,bool & KnownBitsComputed)175 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 176 APInt DemandedMask, KnownBits &Known, 177 bool &KnownBitsComputed) const { 178 return None; 179 } 180 simplifyDemandedVectorEltsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,APInt & UndefElts,APInt & UndefElts2,APInt & UndefElts3,std::function<void (Instruction *,unsigned,APInt,APInt &)> SimplifyAndSetOp)181 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 182 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 183 APInt &UndefElts2, APInt &UndefElts3, 184 std::function<void(Instruction *, unsigned, APInt, APInt &)> 185 SimplifyAndSetOp) const { 186 return None; 187 } 188 getUnrollingPreferences(Loop *,ScalarEvolution &,TTI::UnrollingPreferences &)189 void getUnrollingPreferences(Loop *, ScalarEvolution &, 190 TTI::UnrollingPreferences &) const {} 191 getPeelingPreferences(Loop *,ScalarEvolution &,TTI::PeelingPreferences &)192 void getPeelingPreferences(Loop *, ScalarEvolution &, 193 TTI::PeelingPreferences &) const {} 194 isLegalAddImmediate(int64_t Imm)195 bool isLegalAddImmediate(int64_t Imm) const { return false; } 196 isLegalICmpImmediate(int64_t Imm)197 bool isLegalICmpImmediate(int64_t Imm) const { return false; } 198 199 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 200 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 201 Instruction *I = nullptr) const { 202 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 203 // taken from the implementation of LSR. 204 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 205 } 206 isLSRCostLess(TTI::LSRCost & C1,TTI::LSRCost & C2)207 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const { 208 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 209 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 210 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 211 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 212 } 213 isNumRegsMajorCostOfLSR()214 bool isNumRegsMajorCostOfLSR() const { return true; } 215 isProfitableLSRChainElement(Instruction * I)216 bool isProfitableLSRChainElement(Instruction *I) const { return false; } 217 canMacroFuseCmp()218 bool canMacroFuseCmp() const { return false; } 219 canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)220 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 221 DominatorTree *DT, AssumptionCache *AC, 222 TargetLibraryInfo *LibInfo) const { 223 return false; 224 } 225 226 TTI::AddressingModeKind getPreferredAddressingMode(const Loop * L,ScalarEvolution * SE)227 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { 228 return TTI::AMK_None; 229 } 230 isLegalMaskedStore(Type * DataType,Align Alignment)231 bool isLegalMaskedStore(Type *DataType, Align Alignment) const { 232 return false; 233 } 234 isLegalMaskedLoad(Type * DataType,Align Alignment)235 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { 236 return false; 237 } 238 isLegalNTStore(Type * DataType,Align Alignment)239 bool isLegalNTStore(Type *DataType, Align Alignment) const { 240 // By default, assume nontemporal memory stores are available for stores 241 // that are aligned and have a size that is a power of 2. 242 unsigned DataSize = DL.getTypeStoreSize(DataType); 243 return Alignment >= DataSize && isPowerOf2_32(DataSize); 244 } 245 isLegalNTLoad(Type * DataType,Align Alignment)246 bool isLegalNTLoad(Type *DataType, Align Alignment) const { 247 // By default, assume nontemporal memory loads are available for loads that 248 // are aligned and have a size that is a power of 2. 249 unsigned DataSize = DL.getTypeStoreSize(DataType); 250 return Alignment >= DataSize && isPowerOf2_32(DataSize); 251 } 252 isLegalMaskedScatter(Type * DataType,Align Alignment)253 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 254 return false; 255 } 256 isLegalMaskedGather(Type * DataType,Align Alignment)257 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 258 return false; 259 } 260 isLegalMaskedCompressStore(Type * DataType)261 bool isLegalMaskedCompressStore(Type *DataType) const { return false; } 262 isLegalMaskedExpandLoad(Type * DataType)263 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } 264 hasDivRemOp(Type * DataType,bool IsSigned)265 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } 266 hasVolatileVariant(Instruction * I,unsigned AddrSpace)267 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 268 return false; 269 } 270 prefersVectorizedAddressing()271 bool prefersVectorizedAddressing() const { return true; } 272 getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)273 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 274 int64_t BaseOffset, bool HasBaseReg, 275 int64_t Scale, 276 unsigned AddrSpace) const { 277 // Guess that all legal addressing mode are free. 278 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 279 AddrSpace)) 280 return 0; 281 return -1; 282 } 283 LSRWithInstrQueries()284 bool LSRWithInstrQueries() const { return false; } 285 isTruncateFree(Type * Ty1,Type * Ty2)286 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 287 isProfitableToHoist(Instruction * I)288 bool isProfitableToHoist(Instruction *I) const { return true; } 289 useAA()290 bool useAA() const { return false; } 291 isTypeLegal(Type * Ty)292 bool isTypeLegal(Type *Ty) const { return false; } 293 getRegUsageForType(Type * Ty)294 InstructionCost getRegUsageForType(Type *Ty) const { return 1; } 295 shouldBuildLookupTables()296 bool shouldBuildLookupTables() const { return true; } 297 shouldBuildLookupTablesForConstant(Constant * C)298 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } 299 shouldBuildRelLookupTables()300 bool shouldBuildRelLookupTables() const { return false; } 301 useColdCCForColdCall(Function & F)302 bool useColdCCForColdCall(Function &F) const { return false; } 303 getScalarizationOverhead(VectorType * Ty,const APInt & DemandedElts,bool Insert,bool Extract)304 InstructionCost getScalarizationOverhead(VectorType *Ty, 305 const APInt &DemandedElts, 306 bool Insert, bool Extract) const { 307 return 0; 308 } 309 getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,ArrayRef<Type * > Tys)310 InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 311 ArrayRef<Type *> Tys) const { 312 return 0; 313 } 314 supportsEfficientVectorElementLoadStore()315 bool supportsEfficientVectorElementLoadStore() const { return false; } 316 enableAggressiveInterleaving(bool LoopHasReductions)317 bool enableAggressiveInterleaving(bool LoopHasReductions) const { 318 return false; 319 } 320 enableMemCmpExpansion(bool OptSize,bool IsZeroCmp)321 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 322 bool IsZeroCmp) const { 323 return {}; 324 } 325 enableInterleavedAccessVectorization()326 bool enableInterleavedAccessVectorization() const { return false; } 327 enableMaskedInterleavedAccessVectorization()328 bool enableMaskedInterleavedAccessVectorization() const { return false; } 329 isFPVectorizationPotentiallyUnsafe()330 bool isFPVectorizationPotentiallyUnsafe() const { return false; } 331 allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,Align Alignment,bool * Fast)332 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 333 unsigned AddressSpace, Align Alignment, 334 bool *Fast) const { 335 return false; 336 } 337 getPopcntSupport(unsigned IntTyWidthInBit)338 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { 339 return TTI::PSK_Software; 340 } 341 haveFastSqrt(Type * Ty)342 bool haveFastSqrt(Type *Ty) const { return false; } 343 isFCmpOrdCheaperThanFCmpZero(Type * Ty)344 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 345 getFPOpCost(Type * Ty)346 InstructionCost getFPOpCost(Type *Ty) const { 347 return TargetTransformInfo::TCC_Basic; 348 } 349 getIntImmCodeSizeCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)350 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 351 Type *Ty) const { 352 return 0; 353 } 354 getIntImmCost(const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)355 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 356 TTI::TargetCostKind CostKind) const { 357 return TTI::TCC_Basic; 358 } 359 360 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 361 const APInt &Imm, Type *Ty, 362 TTI::TargetCostKind CostKind, 363 Instruction *Inst = nullptr) const { 364 return TTI::TCC_Free; 365 } 366 getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)367 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 368 const APInt &Imm, Type *Ty, 369 TTI::TargetCostKind CostKind) const { 370 return TTI::TCC_Free; 371 } 372 getNumberOfRegisters(unsigned ClassID)373 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } 374 375 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { 376 return Vector ? 1 : 0; 377 }; 378 getRegisterClassName(unsigned ClassID)379 const char *getRegisterClassName(unsigned ClassID) const { 380 switch (ClassID) { 381 default: 382 return "Generic::Unknown Register Class"; 383 case 0: 384 return "Generic::ScalarRC"; 385 case 1: 386 return "Generic::VectorRC"; 387 } 388 } 389 getRegisterBitWidth(TargetTransformInfo::RegisterKind K)390 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 391 return TypeSize::getFixed(32); 392 } 393 getMinVectorRegisterBitWidth()394 unsigned getMinVectorRegisterBitWidth() const { return 128; } 395 getMaxVScale()396 Optional<unsigned> getMaxVScale() const { return None; } 397 shouldMaximizeVectorBandwidth()398 bool shouldMaximizeVectorBandwidth() const { return false; } 399 getMinimumVF(unsigned ElemWidth,bool IsScalable)400 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { 401 return ElementCount::get(0, IsScalable); 402 } 403 getMaximumVF(unsigned ElemWidth,unsigned Opcode)404 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } 405 shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)406 bool shouldConsiderAddressTypePromotion( 407 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 408 AllowPromotionWithoutCommonHeader = false; 409 return false; 410 } 411 getCacheLineSize()412 unsigned getCacheLineSize() const { return 0; } 413 414 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level)415 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 416 switch (Level) { 417 case TargetTransformInfo::CacheLevel::L1D: 418 LLVM_FALLTHROUGH; 419 case TargetTransformInfo::CacheLevel::L2D: 420 return llvm::Optional<unsigned>(); 421 } 422 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 423 } 424 425 llvm::Optional<unsigned> getCacheAssociativity(TargetTransformInfo::CacheLevel Level)426 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 427 switch (Level) { 428 case TargetTransformInfo::CacheLevel::L1D: 429 LLVM_FALLTHROUGH; 430 case TargetTransformInfo::CacheLevel::L2D: 431 return llvm::Optional<unsigned>(); 432 } 433 434 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 435 } 436 getPrefetchDistance()437 unsigned getPrefetchDistance() const { return 0; } getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)438 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 439 unsigned NumStridedMemAccesses, 440 unsigned NumPrefetches, bool HasCall) const { 441 return 1; 442 } getMaxPrefetchIterationsAhead()443 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } enableWritePrefetching()444 bool enableWritePrefetching() const { return false; } 445 getMaxInterleaveFactor(unsigned VF)446 unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; } 447 448 InstructionCost getArithmeticInstrCost( 449 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 450 TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, 451 TTI::OperandValueProperties Opd1PropInfo, 452 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, 453 const Instruction *CxtI = nullptr) const { 454 // FIXME: A number of transformation tests seem to require these values 455 // which seems a little odd for how arbitary there are. 456 switch (Opcode) { 457 default: 458 break; 459 case Instruction::FDiv: 460 case Instruction::FRem: 461 case Instruction::SDiv: 462 case Instruction::SRem: 463 case Instruction::UDiv: 464 case Instruction::URem: 465 // FIXME: Unlikely to be true for CodeSize. 466 return TTI::TCC_Expensive; 467 } 468 return 1; 469 } 470 getShuffleCost(TTI::ShuffleKind Kind,VectorType * Ty,ArrayRef<int> Mask,int Index,VectorType * SubTp)471 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, 472 ArrayRef<int> Mask, int Index, 473 VectorType *SubTp) const { 474 return 1; 475 } 476 getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,TTI::CastContextHint CCH,TTI::TargetCostKind CostKind,const Instruction * I)477 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 478 TTI::CastContextHint CCH, 479 TTI::TargetCostKind CostKind, 480 const Instruction *I) const { 481 switch (Opcode) { 482 default: 483 break; 484 case Instruction::IntToPtr: { 485 unsigned SrcSize = Src->getScalarSizeInBits(); 486 if (DL.isLegalInteger(SrcSize) && 487 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 488 return 0; 489 break; 490 } 491 case Instruction::PtrToInt: { 492 unsigned DstSize = Dst->getScalarSizeInBits(); 493 if (DL.isLegalInteger(DstSize) && 494 DstSize >= DL.getPointerTypeSizeInBits(Src)) 495 return 0; 496 break; 497 } 498 case Instruction::BitCast: 499 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 500 // Identity and pointer-to-pointer casts are free. 501 return 0; 502 break; 503 case Instruction::Trunc: { 504 // trunc to a native type is free (assuming the target has compare and 505 // shift-right of the same width). 506 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 507 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedSize())) 508 return 0; 509 break; 510 } 511 } 512 return 1; 513 } 514 getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)515 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 516 VectorType *VecTy, 517 unsigned Index) const { 518 return 1; 519 } 520 521 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 522 const Instruction *I = nullptr) const { 523 // A phi would be free, unless we're costing the throughput because it 524 // will require a register. 525 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 526 return 0; 527 return 1; 528 } 529 getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,CmpInst::Predicate VecPred,TTI::TargetCostKind CostKind,const Instruction * I)530 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 531 CmpInst::Predicate VecPred, 532 TTI::TargetCostKind CostKind, 533 const Instruction *I) const { 534 return 1; 535 } 536 getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)537 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 538 unsigned Index) const { 539 return 1; 540 } 541 getMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,const Instruction * I)542 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 543 unsigned AddressSpace, 544 TTI::TargetCostKind CostKind, 545 const Instruction *I) const { 546 return 1; 547 } 548 getMaskedMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind)549 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 550 Align Alignment, unsigned AddressSpace, 551 TTI::TargetCostKind CostKind) const { 552 return 1; 553 } 554 555 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 556 const Value *Ptr, bool VariableMask, 557 Align Alignment, 558 TTI::TargetCostKind CostKind, 559 const Instruction *I = nullptr) const { 560 return 1; 561 } 562 getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,bool UseMaskForCond,bool UseMaskForGaps)563 unsigned getInterleavedMemoryOpCost( 564 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 565 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 566 bool UseMaskForCond, bool UseMaskForGaps) const { 567 return 1; 568 } 569 getIntrinsicInstrCost(const IntrinsicCostAttributes & ICA,TTI::TargetCostKind CostKind)570 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 571 TTI::TargetCostKind CostKind) const { 572 switch (ICA.getID()) { 573 default: 574 break; 575 case Intrinsic::annotation: 576 case Intrinsic::assume: 577 case Intrinsic::sideeffect: 578 case Intrinsic::pseudoprobe: 579 case Intrinsic::dbg_declare: 580 case Intrinsic::dbg_value: 581 case Intrinsic::dbg_label: 582 case Intrinsic::invariant_start: 583 case Intrinsic::invariant_end: 584 case Intrinsic::launder_invariant_group: 585 case Intrinsic::strip_invariant_group: 586 case Intrinsic::is_constant: 587 case Intrinsic::lifetime_start: 588 case Intrinsic::lifetime_end: 589 case Intrinsic::experimental_noalias_scope_decl: 590 case Intrinsic::objectsize: 591 case Intrinsic::ptr_annotation: 592 case Intrinsic::var_annotation: 593 case Intrinsic::experimental_gc_result: 594 case Intrinsic::experimental_gc_relocate: 595 case Intrinsic::coro_alloc: 596 case Intrinsic::coro_begin: 597 case Intrinsic::coro_free: 598 case Intrinsic::coro_end: 599 case Intrinsic::coro_frame: 600 case Intrinsic::coro_size: 601 case Intrinsic::coro_suspend: 602 case Intrinsic::coro_param: 603 case Intrinsic::coro_subfn_addr: 604 // These intrinsics don't actually represent code after lowering. 605 return 0; 606 } 607 return 1; 608 } 609 getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)610 InstructionCost getCallInstrCost(Function *F, Type *RetTy, 611 ArrayRef<Type *> Tys, 612 TTI::TargetCostKind CostKind) const { 613 return 1; 614 } 615 getNumberOfParts(Type * Tp)616 unsigned getNumberOfParts(Type *Tp) const { return 0; } 617 getAddressComputationCost(Type * Tp,ScalarEvolution *,const SCEV *)618 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, 619 const SCEV *) const { 620 return 0; 621 } 622 getArithmeticReductionCost(unsigned,VectorType *,bool,TTI::TargetCostKind)623 InstructionCost getArithmeticReductionCost(unsigned, VectorType *, bool, 624 TTI::TargetCostKind) const { 625 return 1; 626 } 627 getMinMaxReductionCost(VectorType *,VectorType *,bool,bool,TTI::TargetCostKind)628 InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool, bool, 629 TTI::TargetCostKind) const { 630 return 1; 631 } 632 633 InstructionCost getExtendedAddReductionCost( 634 bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, 635 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const { 636 return 1; 637 } 638 getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)639 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 640 return 0; 641 } 642 getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)643 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { 644 return false; 645 } 646 getAtomicMemIntrinsicMaxElementSize()647 unsigned getAtomicMemIntrinsicMaxElementSize() const { 648 // Note for overrides: You must ensure for all element unordered-atomic 649 // memory intrinsics that all power-of-2 element sizes up to, and 650 // including, the return value of this method have a corresponding 651 // runtime lib call. These runtime lib call definitions can be found 652 // in RuntimeLibcalls.h 653 return 0; 654 } 655 getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)656 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 657 Type *ExpectedType) const { 658 return nullptr; 659 } 660 getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAddrSpace,unsigned DestAddrSpace,unsigned SrcAlign,unsigned DestAlign)661 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 662 unsigned SrcAddrSpace, unsigned DestAddrSpace, 663 unsigned SrcAlign, unsigned DestAlign) const { 664 return Type::getInt8Ty(Context); 665 } 666 getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAddrSpace,unsigned DestAddrSpace,unsigned SrcAlign,unsigned DestAlign)667 void getMemcpyLoopResidualLoweringType( 668 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 669 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 670 unsigned SrcAlign, unsigned DestAlign) const { 671 for (unsigned i = 0; i != RemainingBytes; ++i) 672 OpsOut.push_back(Type::getInt8Ty(Context)); 673 } 674 areInlineCompatible(const Function * Caller,const Function * Callee)675 bool areInlineCompatible(const Function *Caller, 676 const Function *Callee) const { 677 return (Caller->getFnAttribute("target-cpu") == 678 Callee->getFnAttribute("target-cpu")) && 679 (Caller->getFnAttribute("target-features") == 680 Callee->getFnAttribute("target-features")); 681 } 682 areFunctionArgsABICompatible(const Function * Caller,const Function * Callee,SmallPtrSetImpl<Argument * > & Args)683 bool areFunctionArgsABICompatible(const Function *Caller, 684 const Function *Callee, 685 SmallPtrSetImpl<Argument *> &Args) const { 686 return (Caller->getFnAttribute("target-cpu") == 687 Callee->getFnAttribute("target-cpu")) && 688 (Caller->getFnAttribute("target-features") == 689 Callee->getFnAttribute("target-features")); 690 } 691 isIndexedLoadLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)692 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 693 const DataLayout &DL) const { 694 return false; 695 } 696 isIndexedStoreLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)697 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 698 const DataLayout &DL) const { 699 return false; 700 } 701 getLoadStoreVecRegBitWidth(unsigned AddrSpace)702 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 703 isLegalToVectorizeLoad(LoadInst * LI)704 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 705 isLegalToVectorizeStore(StoreInst * SI)706 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 707 isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)708 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 709 unsigned AddrSpace) const { 710 return true; 711 } 712 isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)713 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 714 unsigned AddrSpace) const { 715 return true; 716 } 717 isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc,ElementCount VF)718 bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc, 719 ElementCount VF) const { 720 return true; 721 } 722 getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)723 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 724 unsigned ChainSizeInBytes, 725 VectorType *VecTy) const { 726 return VF; 727 } 728 getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)729 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 730 unsigned ChainSizeInBytes, 731 VectorType *VecTy) const { 732 return VF; 733 } 734 preferInLoopReduction(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)735 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 736 TTI::ReductionFlags Flags) const { 737 return false; 738 } 739 preferPredicatedReductionSelect(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)740 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 741 TTI::ReductionFlags Flags) const { 742 return false; 743 } 744 shouldExpandReduction(const IntrinsicInst * II)745 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } 746 getGISelRematGlobalCost()747 unsigned getGISelRematGlobalCost() const { return 1; } 748 supportsScalableVectors()749 bool supportsScalableVectors() const { return false; } 750 hasActiveVectorLength()751 bool hasActiveVectorLength() const { return false; } 752 753 TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic & PI)754 getVPLegalizationStrategy(const VPIntrinsic &PI) const { 755 return TargetTransformInfo::VPLegalization( 756 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, 757 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); 758 } 759 760 protected: 761 // Obtain the minimum required size to hold the value (without the sign) 762 // In case of a vector it returns the min required size for one element. minRequiredElementSize(const Value * Val,bool & isSigned)763 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 764 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 765 const auto *VectorValue = cast<Constant>(Val); 766 767 // In case of a vector need to pick the max between the min 768 // required size for each element 769 auto *VT = cast<FixedVectorType>(Val->getType()); 770 771 // Assume unsigned elements 772 isSigned = false; 773 774 // The max required size is the size of the vector element type 775 unsigned MaxRequiredSize = 776 VT->getElementType()->getPrimitiveSizeInBits().getFixedSize(); 777 778 unsigned MinRequiredSize = 0; 779 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 780 if (auto *IntElement = 781 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 782 bool signedElement = IntElement->getValue().isNegative(); 783 // Get the element min required size. 784 unsigned ElementMinRequiredSize = 785 IntElement->getValue().getMinSignedBits() - 1; 786 // In case one element is signed then all the vector is signed. 787 isSigned |= signedElement; 788 // Save the max required bit size between all the elements. 789 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 790 } else { 791 // not an int constant element 792 return MaxRequiredSize; 793 } 794 } 795 return MinRequiredSize; 796 } 797 798 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 799 isSigned = CI->getValue().isNegative(); 800 return CI->getValue().getMinSignedBits() - 1; 801 } 802 803 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 804 isSigned = true; 805 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 806 } 807 808 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 809 isSigned = false; 810 return Cast->getSrcTy()->getScalarSizeInBits(); 811 } 812 813 isSigned = false; 814 return Val->getType()->getScalarSizeInBits(); 815 } 816 isStridedAccess(const SCEV * Ptr)817 bool isStridedAccess(const SCEV *Ptr) const { 818 return Ptr && isa<SCEVAddRecExpr>(Ptr); 819 } 820 getConstantStrideStep(ScalarEvolution * SE,const SCEV * Ptr)821 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 822 const SCEV *Ptr) const { 823 if (!isStridedAccess(Ptr)) 824 return nullptr; 825 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 826 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 827 } 828 isConstantStridedAccessLessThan(ScalarEvolution * SE,const SCEV * Ptr,int64_t MergeDistance)829 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 830 int64_t MergeDistance) const { 831 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 832 if (!Step) 833 return false; 834 APInt StrideVal = Step->getAPInt(); 835 if (StrideVal.getBitWidth() > 64) 836 return false; 837 // FIXME: Need to take absolute value for negative stride case. 838 return StrideVal.getSExtValue() < MergeDistance; 839 } 840 }; 841 842 /// CRTP base class for use as a mix-in that aids implementing 843 /// a TargetTransformInfo-compatible class. 844 template <typename T> 845 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 846 private: 847 typedef TargetTransformInfoImplBase BaseT; 848 849 protected: TargetTransformInfoImplCRTPBase(const DataLayout & DL)850 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 851 852 public: 853 using BaseT::getGEPCost; 854 855 InstructionCost 856 getGEPCost(Type *PointeeType, const Value *Ptr, 857 ArrayRef<const Value *> Operands, 858 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { 859 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 860 // TODO: will remove this when pointers have an opaque type. 861 assert(Ptr->getType()->getScalarType()->getPointerElementType() == 862 PointeeType && 863 "explicit pointee type doesn't match operand's pointee type"); 864 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 865 bool HasBaseReg = (BaseGV == nullptr); 866 867 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 868 APInt BaseOffset(PtrSizeBits, 0); 869 int64_t Scale = 0; 870 871 auto GTI = gep_type_begin(PointeeType, Operands); 872 Type *TargetType = nullptr; 873 874 // Handle the case where the GEP instruction has a single operand, 875 // the basis, therefore TargetType is a nullptr. 876 if (Operands.empty()) 877 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 878 879 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 880 TargetType = GTI.getIndexedType(); 881 // We assume that the cost of Scalar GEP with constant index and the 882 // cost of Vector GEP with splat constant index are the same. 883 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 884 if (!ConstIdx) 885 if (auto Splat = getSplatValue(*I)) 886 ConstIdx = dyn_cast<ConstantInt>(Splat); 887 if (StructType *STy = GTI.getStructTypeOrNull()) { 888 // For structures the index is always splat or scalar constant 889 assert(ConstIdx && "Unexpected GEP index"); 890 uint64_t Field = ConstIdx->getZExtValue(); 891 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 892 } else { 893 // If this operand is a scalable type, bail out early. 894 // TODO: handle scalable vectors 895 if (isa<ScalableVectorType>(TargetType)) 896 return TTI::TCC_Basic; 897 int64_t ElementSize = 898 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize(); 899 if (ConstIdx) { 900 BaseOffset += 901 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 902 } else { 903 // Needs scale register. 904 if (Scale != 0) 905 // No addressing mode takes two scale registers. 906 return TTI::TCC_Basic; 907 Scale = ElementSize; 908 } 909 } 910 } 911 912 if (static_cast<T *>(this)->isLegalAddressingMode( 913 TargetType, const_cast<GlobalValue *>(BaseGV), 914 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 915 Ptr->getType()->getPointerAddressSpace())) 916 return TTI::TCC_Free; 917 return TTI::TCC_Basic; 918 } 919 getUserCost(const User * U,ArrayRef<const Value * > Operands,TTI::TargetCostKind CostKind)920 InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, 921 TTI::TargetCostKind CostKind) { 922 auto *TargetTTI = static_cast<T *>(this); 923 // Handle non-intrinsic calls, invokes, and callbr. 924 // FIXME: Unlikely to be true for anything but CodeSize. 925 auto *CB = dyn_cast<CallBase>(U); 926 if (CB && !isa<IntrinsicInst>(U)) { 927 if (const Function *F = CB->getCalledFunction()) { 928 if (!TargetTTI->isLoweredToCall(F)) 929 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 930 931 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 932 } 933 // For indirect or other calls, scale cost by number of arguments. 934 return TTI::TCC_Basic * (CB->arg_size() + 1); 935 } 936 937 Type *Ty = U->getType(); 938 Type *OpTy = 939 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr; 940 unsigned Opcode = Operator::getOpcode(U); 941 auto *I = dyn_cast<Instruction>(U); 942 switch (Opcode) { 943 default: 944 break; 945 case Instruction::Call: { 946 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 947 auto *Intrinsic = cast<IntrinsicInst>(U); 948 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 949 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 950 } 951 case Instruction::Br: 952 case Instruction::Ret: 953 case Instruction::PHI: 954 case Instruction::Switch: 955 return TargetTTI->getCFInstrCost(Opcode, CostKind, I); 956 case Instruction::ExtractValue: 957 case Instruction::Freeze: 958 return TTI::TCC_Free; 959 case Instruction::Alloca: 960 if (cast<AllocaInst>(U)->isStaticAlloca()) 961 return TTI::TCC_Free; 962 break; 963 case Instruction::GetElementPtr: { 964 const GEPOperator *GEP = cast<GEPOperator>(U); 965 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 966 GEP->getPointerOperand(), 967 Operands.drop_front()); 968 } 969 case Instruction::Add: 970 case Instruction::FAdd: 971 case Instruction::Sub: 972 case Instruction::FSub: 973 case Instruction::Mul: 974 case Instruction::FMul: 975 case Instruction::UDiv: 976 case Instruction::SDiv: 977 case Instruction::FDiv: 978 case Instruction::URem: 979 case Instruction::SRem: 980 case Instruction::FRem: 981 case Instruction::Shl: 982 case Instruction::LShr: 983 case Instruction::AShr: 984 case Instruction::And: 985 case Instruction::Or: 986 case Instruction::Xor: 987 case Instruction::FNeg: { 988 TTI::OperandValueProperties Op1VP = TTI::OP_None; 989 TTI::OperandValueProperties Op2VP = TTI::OP_None; 990 TTI::OperandValueKind Op1VK = 991 TTI::getOperandInfo(U->getOperand(0), Op1VP); 992 TTI::OperandValueKind Op2VK = Opcode != Instruction::FNeg ? 993 TTI::getOperandInfo(U->getOperand(1), Op2VP) : TTI::OK_AnyValue; 994 SmallVector<const Value *, 2> Operands(U->operand_values()); 995 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, 996 Op1VK, Op2VK, 997 Op1VP, Op2VP, Operands, I); 998 } 999 case Instruction::IntToPtr: 1000 case Instruction::PtrToInt: 1001 case Instruction::SIToFP: 1002 case Instruction::UIToFP: 1003 case Instruction::FPToUI: 1004 case Instruction::FPToSI: 1005 case Instruction::Trunc: 1006 case Instruction::FPTrunc: 1007 case Instruction::BitCast: 1008 case Instruction::FPExt: 1009 case Instruction::SExt: 1010 case Instruction::ZExt: 1011 case Instruction::AddrSpaceCast: 1012 return TargetTTI->getCastInstrCost( 1013 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 1014 case Instruction::Store: { 1015 auto *SI = cast<StoreInst>(U); 1016 Type *ValTy = U->getOperand(0)->getType(); 1017 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 1018 SI->getPointerAddressSpace(), 1019 CostKind, I); 1020 } 1021 case Instruction::Load: { 1022 auto *LI = cast<LoadInst>(U); 1023 return TargetTTI->getMemoryOpCost(Opcode, U->getType(), LI->getAlign(), 1024 LI->getPointerAddressSpace(), 1025 CostKind, I); 1026 } 1027 case Instruction::Select: { 1028 const Value *Op0, *Op1; 1029 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || 1030 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { 1031 // select x, y, false --> x & y 1032 // select x, true, y --> x | y 1033 TTI::OperandValueProperties Op1VP = TTI::OP_None; 1034 TTI::OperandValueProperties Op2VP = TTI::OP_None; 1035 TTI::OperandValueKind Op1VK = TTI::getOperandInfo(Op0, Op1VP); 1036 TTI::OperandValueKind Op2VK = TTI::getOperandInfo(Op1, Op2VP); 1037 assert(Op0->getType()->getScalarSizeInBits() == 1 && 1038 Op1->getType()->getScalarSizeInBits() == 1); 1039 1040 SmallVector<const Value *, 2> Operands{Op0, Op1}; 1041 return TargetTTI->getArithmeticInstrCost( 1042 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, 1043 CostKind, Op1VK, Op2VK, Op1VP, Op2VP, Operands, I); 1044 } 1045 Type *CondTy = U->getOperand(0)->getType(); 1046 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 1047 CmpInst::BAD_ICMP_PREDICATE, 1048 CostKind, I); 1049 } 1050 case Instruction::ICmp: 1051 case Instruction::FCmp: { 1052 Type *ValTy = U->getOperand(0)->getType(); 1053 // TODO: Also handle ICmp/FCmp constant expressions. 1054 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1055 I ? cast<CmpInst>(I)->getPredicate() 1056 : CmpInst::BAD_ICMP_PREDICATE, 1057 CostKind, I); 1058 } 1059 case Instruction::InsertElement: { 1060 auto *IE = dyn_cast<InsertElementInst>(U); 1061 if (!IE) 1062 return TTI::TCC_Basic; // FIXME 1063 auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); 1064 unsigned Idx = CI ? CI->getZExtValue() : -1; 1065 return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx); 1066 } 1067 case Instruction::ShuffleVector: { 1068 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1069 if (!Shuffle) 1070 return TTI::TCC_Basic; // FIXME 1071 auto *VecTy = cast<VectorType>(U->getType()); 1072 auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType()); 1073 1074 // TODO: Identify and add costs for insert subvector, etc. 1075 int SubIndex; 1076 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1077 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, 1078 Shuffle->getShuffleMask(), SubIndex, 1079 VecTy); 1080 else if (Shuffle->changesLength()) 1081 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; 1082 else if (Shuffle->isIdentity()) 1083 return 0; 1084 else if (Shuffle->isReverse()) 1085 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 1086 Shuffle->getShuffleMask(), 0, nullptr); 1087 else if (Shuffle->isSelect()) 1088 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 1089 Shuffle->getShuffleMask(), 0, nullptr); 1090 else if (Shuffle->isTranspose()) 1091 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 1092 Shuffle->getShuffleMask(), 0, nullptr); 1093 else if (Shuffle->isZeroEltSplat()) 1094 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 1095 Shuffle->getShuffleMask(), 0, nullptr); 1096 else if (Shuffle->isSingleSource()) 1097 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 1098 Shuffle->getShuffleMask(), 0, nullptr); 1099 1100 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 1101 Shuffle->getShuffleMask(), 0, nullptr); 1102 } 1103 case Instruction::ExtractElement: { 1104 unsigned Idx = -1; 1105 auto *EEI = dyn_cast<ExtractElementInst>(U); 1106 if (!EEI) 1107 return TTI::TCC_Basic; // FIXME 1108 1109 auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)); 1110 if (CI) 1111 Idx = CI->getZExtValue(); 1112 1113 // Try to match a reduction (a series of shufflevector and vector ops 1114 // followed by an extractelement). 1115 unsigned RdxOpcode; 1116 VectorType *RdxType; 1117 bool IsPairwise; 1118 switch (TTI::matchVectorReduction(EEI, RdxOpcode, RdxType, IsPairwise)) { 1119 case TTI::RK_Arithmetic: 1120 return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType, 1121 IsPairwise, CostKind); 1122 case TTI::RK_MinMax: 1123 return TargetTTI->getMinMaxReductionCost( 1124 RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)), 1125 IsPairwise, /*IsUnsigned=*/false, CostKind); 1126 case TTI::RK_UnsignedMinMax: 1127 return TargetTTI->getMinMaxReductionCost( 1128 RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)), 1129 IsPairwise, /*IsUnsigned=*/true, CostKind); 1130 case TTI::RK_None: 1131 break; 1132 } 1133 return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(), 1134 Idx); 1135 } 1136 } 1137 // By default, just classify everything as 'basic'. 1138 return TTI::TCC_Basic; 1139 } 1140 getInstructionLatency(const Instruction * I)1141 InstructionCost getInstructionLatency(const Instruction *I) { 1142 SmallVector<const Value *, 4> Operands(I->operand_values()); 1143 if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) 1144 return 0; 1145 1146 if (isa<LoadInst>(I)) 1147 return 4; 1148 1149 Type *DstTy = I->getType(); 1150 1151 // Usually an intrinsic is a simple instruction. 1152 // A real function call is much slower. 1153 if (auto *CI = dyn_cast<CallInst>(I)) { 1154 const Function *F = CI->getCalledFunction(); 1155 if (!F || static_cast<T *>(this)->isLoweredToCall(F)) 1156 return 40; 1157 // Some intrinsics return a value and a flag, we use the value type 1158 // to decide its latency. 1159 if (StructType *StructTy = dyn_cast<StructType>(DstTy)) 1160 DstTy = StructTy->getElementType(0); 1161 // Fall through to simple instructions. 1162 } 1163 1164 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) 1165 DstTy = VectorTy->getElementType(); 1166 if (DstTy->isFloatingPointTy()) 1167 return 3; 1168 1169 return 1; 1170 } 1171 }; 1172 } // namespace llvm 1173 1174 #endif 1175