1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/GetElementPtrTypeIterator.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/Operator.h" 24 #include "llvm/IR/PatternMatch.h" 25 #include <optional> 26 #include <utility> 27 28 namespace llvm { 29 30 class Function; 31 32 /// Base class for use as a mix-in that aids implementing 33 /// a TargetTransformInfo-compatible class. 34 class TargetTransformInfoImplBase { 35 36 protected: 37 typedef TargetTransformInfo TTI; 38 39 const DataLayout &DL; 40 41 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 42 43 public: 44 // Provide value semantics. MSVC requires that we spell all of these out. 45 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default; 46 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 47 48 const DataLayout &getDataLayout() const { return DL; } 49 50 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 51 ArrayRef<const Value *> Operands, Type *AccessType, 52 TTI::TargetCostKind CostKind) const { 53 // In the basic model, we just assume that all-constant GEPs will be folded 54 // into their uses via addressing modes. 55 for (const Value *Operand : Operands) 56 if (!isa<Constant>(Operand)) 57 return TTI::TCC_Basic; 58 59 return TTI::TCC_Free; 60 } 61 62 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 63 unsigned &JTSize, 64 ProfileSummaryInfo *PSI, 65 BlockFrequencyInfo *BFI) const { 66 (void)PSI; 67 (void)BFI; 68 JTSize = 0; 69 return SI.getNumCases(); 70 } 71 72 unsigned getInliningThresholdMultiplier() const { return 1; } 73 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { return 8; } 74 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const { 75 return 8; 76 } 77 int getInliningLastCallToStaticBonus() const { 78 // This is the value of InlineConstants::LastCallToStaticBonus before it was 79 // removed along with the introduction of this function. 80 return 15000; 81 } 82 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } 83 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const { 84 return 0; 85 }; 86 87 int getInlinerVectorBonusPercent() const { return 150; } 88 89 InstructionCost getMemcpyCost(const Instruction *I) const { 90 return TTI::TCC_Expensive; 91 } 92 93 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { 94 return 64; 95 } 96 97 // Although this default value is arbitrary, it is not random. It is assumed 98 // that a condition that evaluates the same way by a higher percentage than 99 // this is best represented as control flow. Therefore, the default value N 100 // should be set such that the win from N% correct executions is greater than 101 // the loss from (100 - N)% mispredicted executions for the majority of 102 // intended targets. 103 BranchProbability getPredictableBranchThreshold() const { 104 return BranchProbability(99, 100); 105 } 106 107 InstructionCost getBranchMispredictPenalty() const { return 0; } 108 109 bool hasBranchDivergence(const Function *F = nullptr) const { return false; } 110 111 bool isSourceOfDivergence(const Value *V) const { return false; } 112 113 bool isAlwaysUniform(const Value *V) const { return false; } 114 115 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { 116 return false; 117 } 118 119 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const { 120 return true; 121 } 122 123 unsigned getFlatAddressSpace() const { return -1; } 124 125 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 126 Intrinsic::ID IID) const { 127 return false; 128 } 129 130 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } 131 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 132 return AS == 0; 133 }; 134 135 unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 136 137 bool isSingleThreaded() const { return false; } 138 139 std::pair<const Value *, unsigned> 140 getPredicatedAddrSpace(const Value *V) const { 141 return std::make_pair(nullptr, -1); 142 } 143 144 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 145 Value *NewV) const { 146 return nullptr; 147 } 148 149 bool isLoweredToCall(const Function *F) const { 150 assert(F && "A concrete function must be provided to this routine."); 151 152 // FIXME: These should almost certainly not be handled here, and instead 153 // handled with the help of TLI or the target itself. This was largely 154 // ported from existing analysis heuristics here so that such refactorings 155 // can take place in the future. 156 157 if (F->isIntrinsic()) 158 return false; 159 160 if (F->hasLocalLinkage() || !F->hasName()) 161 return true; 162 163 StringRef Name = F->getName(); 164 165 // These will all likely lower to a single selection DAG node. 166 // clang-format off 167 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 168 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || 169 Name == "fmin" || Name == "fminf" || Name == "fminl" || 170 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 171 Name == "sin" || Name == "sinf" || Name == "sinl" || 172 Name == "cos" || Name == "cosf" || Name == "cosl" || 173 Name == "tan" || Name == "tanf" || Name == "tanl" || 174 Name == "asin" || Name == "asinf" || Name == "asinl" || 175 Name == "acos" || Name == "acosf" || Name == "acosl" || 176 Name == "atan" || Name == "atanf" || Name == "atanl" || 177 Name == "atan2" || Name == "atan2f" || Name == "atan2l"|| 178 Name == "sinh" || Name == "sinhf" || Name == "sinhl" || 179 Name == "cosh" || Name == "coshf" || Name == "coshl" || 180 Name == "tanh" || Name == "tanhf" || Name == "tanhl" || 181 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" || 182 Name == "exp10" || Name == "exp10l" || Name == "exp10f") 183 return false; 184 // clang-format on 185 // These are all likely to be optimized into something smaller. 186 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 187 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 188 Name == "floorf" || Name == "ceil" || Name == "round" || 189 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 190 Name == "llabs") 191 return false; 192 193 return true; 194 } 195 196 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 197 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 198 HardwareLoopInfo &HWLoopInfo) const { 199 return false; 200 } 201 202 unsigned getEpilogueVectorizationMinVF() const { return 16; } 203 204 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; } 205 206 TailFoldingStyle 207 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const { 208 return TailFoldingStyle::DataWithoutLaneMask; 209 } 210 211 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 212 IntrinsicInst &II) const { 213 return std::nullopt; 214 } 215 216 std::optional<Value *> 217 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 218 APInt DemandedMask, KnownBits &Known, 219 bool &KnownBitsComputed) const { 220 return std::nullopt; 221 } 222 223 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 224 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 225 APInt &UndefElts2, APInt &UndefElts3, 226 std::function<void(Instruction *, unsigned, APInt, APInt &)> 227 SimplifyAndSetOp) const { 228 return std::nullopt; 229 } 230 231 void getUnrollingPreferences(Loop *, ScalarEvolution &, 232 TTI::UnrollingPreferences &, 233 OptimizationRemarkEmitter *) const {} 234 235 void getPeelingPreferences(Loop *, ScalarEvolution &, 236 TTI::PeelingPreferences &) const {} 237 238 bool isLegalAddImmediate(int64_t Imm) const { return false; } 239 240 bool isLegalAddScalableImmediate(int64_t Imm) const { return false; } 241 242 bool isLegalICmpImmediate(int64_t Imm) const { return false; } 243 244 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 245 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 246 Instruction *I = nullptr, 247 int64_t ScalableOffset = 0) const { 248 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 249 // taken from the implementation of LSR. 250 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 251 } 252 253 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const { 254 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 255 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 256 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 257 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 258 } 259 260 bool isNumRegsMajorCostOfLSR() const { return true; } 261 262 bool shouldDropLSRSolutionIfLessProfitable() const { return false; } 263 264 bool isProfitableLSRChainElement(Instruction *I) const { return false; } 265 266 bool canMacroFuseCmp() const { return false; } 267 268 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 269 DominatorTree *DT, AssumptionCache *AC, 270 TargetLibraryInfo *LibInfo) const { 271 return false; 272 } 273 274 TTI::AddressingModeKind 275 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { 276 return TTI::AMK_None; 277 } 278 279 bool isLegalMaskedStore(Type *DataType, Align Alignment) const { 280 return false; 281 } 282 283 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { 284 return false; 285 } 286 287 bool isLegalNTStore(Type *DataType, Align Alignment) const { 288 // By default, assume nontemporal memory stores are available for stores 289 // that are aligned and have a size that is a power of 2. 290 unsigned DataSize = DL.getTypeStoreSize(DataType); 291 return Alignment >= DataSize && isPowerOf2_32(DataSize); 292 } 293 294 bool isLegalNTLoad(Type *DataType, Align Alignment) const { 295 // By default, assume nontemporal memory loads are available for loads that 296 // are aligned and have a size that is a power of 2. 297 unsigned DataSize = DL.getTypeStoreSize(DataType); 298 return Alignment >= DataSize && isPowerOf2_32(DataSize); 299 } 300 301 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 302 return false; 303 } 304 305 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 306 return false; 307 } 308 309 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 310 return false; 311 } 312 313 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const { 314 return false; 315 } 316 317 bool forceScalarizeMaskedScatter(VectorType *DataType, 318 Align Alignment) const { 319 return false; 320 } 321 322 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const { 323 return false; 324 } 325 326 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, 327 const SmallBitVector &OpcodeMask) const { 328 return false; 329 } 330 331 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const { 332 return false; 333 } 334 335 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const { 336 return false; 337 } 338 339 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, 340 Align Alignment, unsigned AddrSpace) { 341 return false; 342 } 343 344 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const { 345 return false; 346 } 347 348 bool enableOrderedReductions() const { return false; } 349 350 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } 351 352 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 353 return false; 354 } 355 356 bool prefersVectorizedAddressing() const { return true; } 357 358 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 359 StackOffset BaseOffset, bool HasBaseReg, 360 int64_t Scale, 361 unsigned AddrSpace) const { 362 // Guess that all legal addressing mode are free. 363 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg, 364 Scale, AddrSpace, /*I=*/nullptr, 365 BaseOffset.getScalable())) 366 return 0; 367 return -1; 368 } 369 370 bool LSRWithInstrQueries() const { return false; } 371 372 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 373 374 bool isProfitableToHoist(Instruction *I) const { return true; } 375 376 bool useAA() const { return false; } 377 378 bool isTypeLegal(Type *Ty) const { return false; } 379 380 unsigned getRegUsageForType(Type *Ty) const { return 1; } 381 382 bool shouldBuildLookupTables() const { return true; } 383 384 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } 385 386 bool shouldBuildRelLookupTables() const { return false; } 387 388 bool useColdCCForColdCall(Function &F) const { return false; } 389 390 bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const { 391 return false; 392 } 393 394 bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, 395 unsigned ScalarOpdIdx) const { 396 return false; 397 } 398 399 bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, 400 int OpdIdx) const { 401 return OpdIdx == -1; 402 } 403 404 bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, 405 int RetIdx) const { 406 return RetIdx == 0; 407 } 408 409 InstructionCost getScalarizationOverhead(VectorType *Ty, 410 const APInt &DemandedElts, 411 bool Insert, bool Extract, 412 TTI::TargetCostKind CostKind, 413 ArrayRef<Value *> VL = {}) const { 414 return 0; 415 } 416 417 InstructionCost 418 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 419 ArrayRef<Type *> Tys, 420 TTI::TargetCostKind CostKind) const { 421 return 0; 422 } 423 424 bool supportsEfficientVectorElementLoadStore() const { return false; } 425 426 bool supportsTailCalls() const { return true; } 427 428 bool enableAggressiveInterleaving(bool LoopHasReductions) const { 429 return false; 430 } 431 432 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 433 bool IsZeroCmp) const { 434 return {}; 435 } 436 437 bool enableSelectOptimize() const { return true; } 438 439 bool shouldTreatInstructionLikeSelect(const Instruction *I) { 440 // A select with two constant operands will usually be better left as a 441 // select. 442 using namespace llvm::PatternMatch; 443 if (match(I, m_Select(m_Value(), m_Constant(), m_Constant()))) 444 return false; 445 // If the select is a logical-and/logical-or then it is better treated as a 446 // and/or by the backend. 447 return isa<SelectInst>(I) && 448 !match(I, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), 449 m_LogicalOr(m_Value(), m_Value()))); 450 } 451 452 bool enableInterleavedAccessVectorization() const { return false; } 453 454 bool enableMaskedInterleavedAccessVectorization() const { return false; } 455 456 bool isFPVectorizationPotentiallyUnsafe() const { return false; } 457 458 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 459 unsigned AddressSpace, Align Alignment, 460 unsigned *Fast) const { 461 return false; 462 } 463 464 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { 465 return TTI::PSK_Software; 466 } 467 468 bool haveFastSqrt(Type *Ty) const { return false; } 469 470 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; } 471 472 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 473 474 InstructionCost getFPOpCost(Type *Ty) const { 475 return TargetTransformInfo::TCC_Basic; 476 } 477 478 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 479 const APInt &Imm, Type *Ty) const { 480 return 0; 481 } 482 483 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 484 TTI::TargetCostKind CostKind) const { 485 return TTI::TCC_Basic; 486 } 487 488 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 489 const APInt &Imm, Type *Ty, 490 TTI::TargetCostKind CostKind, 491 Instruction *Inst = nullptr) const { 492 return TTI::TCC_Free; 493 } 494 495 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 496 const APInt &Imm, Type *Ty, 497 TTI::TargetCostKind CostKind) const { 498 return TTI::TCC_Free; 499 } 500 501 bool preferToKeepConstantsAttached(const Instruction &Inst, 502 const Function &Fn) const { 503 return false; 504 } 505 506 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } 507 bool hasConditionalLoadStoreForType(Type *Ty) const { return false; } 508 509 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { 510 return Vector ? 1 : 0; 511 }; 512 513 const char *getRegisterClassName(unsigned ClassID) const { 514 switch (ClassID) { 515 default: 516 return "Generic::Unknown Register Class"; 517 case 0: 518 return "Generic::ScalarRC"; 519 case 1: 520 return "Generic::VectorRC"; 521 } 522 } 523 524 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 525 return TypeSize::getFixed(32); 526 } 527 528 unsigned getMinVectorRegisterBitWidth() const { return 128; } 529 530 std::optional<unsigned> getMaxVScale() const { return std::nullopt; } 531 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; } 532 bool isVScaleKnownToBeAPowerOfTwo() const { return false; } 533 534 bool 535 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { 536 return false; 537 } 538 539 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { 540 return ElementCount::get(0, IsScalable); 541 } 542 543 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } 544 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; } 545 546 bool shouldConsiderAddressTypePromotion( 547 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 548 AllowPromotionWithoutCommonHeader = false; 549 return false; 550 } 551 552 unsigned getCacheLineSize() const { return 0; } 553 std::optional<unsigned> 554 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 555 switch (Level) { 556 case TargetTransformInfo::CacheLevel::L1D: 557 [[fallthrough]]; 558 case TargetTransformInfo::CacheLevel::L2D: 559 return std::nullopt; 560 } 561 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 562 } 563 564 std::optional<unsigned> 565 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 566 switch (Level) { 567 case TargetTransformInfo::CacheLevel::L1D: 568 [[fallthrough]]; 569 case TargetTransformInfo::CacheLevel::L2D: 570 return std::nullopt; 571 } 572 573 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 574 } 575 576 std::optional<unsigned> getMinPageSize() const { return {}; } 577 578 unsigned getPrefetchDistance() const { return 0; } 579 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 580 unsigned NumStridedMemAccesses, 581 unsigned NumPrefetches, bool HasCall) const { 582 return 1; 583 } 584 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } 585 bool enableWritePrefetching() const { return false; } 586 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; } 587 588 InstructionCost 589 getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, 590 Type *AccumType, ElementCount VF, 591 TTI::PartialReductionExtendKind OpAExtend, 592 TTI::PartialReductionExtendKind OpBExtend, 593 std::optional<unsigned> BinOp = std::nullopt) const { 594 return InstructionCost::getInvalid(); 595 } 596 597 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; } 598 599 InstructionCost getArithmeticInstrCost( 600 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 601 TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, 602 ArrayRef<const Value *> Args, 603 const Instruction *CxtI = nullptr) const { 604 // Widenable conditions will eventually lower into constants, so some 605 // operations with them will be trivially optimized away. 606 auto IsWidenableCondition = [](const Value *V) { 607 if (auto *II = dyn_cast<IntrinsicInst>(V)) 608 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition) 609 return true; 610 return false; 611 }; 612 // FIXME: A number of transformation tests seem to require these values 613 // which seems a little odd for how arbitary there are. 614 switch (Opcode) { 615 default: 616 break; 617 case Instruction::FDiv: 618 case Instruction::FRem: 619 case Instruction::SDiv: 620 case Instruction::SRem: 621 case Instruction::UDiv: 622 case Instruction::URem: 623 // FIXME: Unlikely to be true for CodeSize. 624 return TTI::TCC_Expensive; 625 case Instruction::And: 626 case Instruction::Or: 627 if (any_of(Args, IsWidenableCondition)) 628 return TTI::TCC_Free; 629 break; 630 } 631 632 // Assume a 3cy latency for fp arithmetic ops. 633 if (CostKind == TTI::TCK_Latency) 634 if (Ty->getScalarType()->isFloatingPointTy()) 635 return 3; 636 637 return 1; 638 } 639 640 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, 641 unsigned Opcode1, 642 const SmallBitVector &OpcodeMask, 643 TTI::TargetCostKind CostKind) const { 644 return InstructionCost::getInvalid(); 645 } 646 647 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, 648 ArrayRef<int> Mask, 649 TTI::TargetCostKind CostKind, int Index, 650 VectorType *SubTp, 651 ArrayRef<const Value *> Args = {}, 652 const Instruction *CxtI = nullptr) const { 653 return 1; 654 } 655 656 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 657 TTI::CastContextHint CCH, 658 TTI::TargetCostKind CostKind, 659 const Instruction *I) const { 660 switch (Opcode) { 661 default: 662 break; 663 case Instruction::IntToPtr: { 664 unsigned SrcSize = Src->getScalarSizeInBits(); 665 if (DL.isLegalInteger(SrcSize) && 666 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 667 return 0; 668 break; 669 } 670 case Instruction::PtrToInt: { 671 unsigned DstSize = Dst->getScalarSizeInBits(); 672 if (DL.isLegalInteger(DstSize) && 673 DstSize >= DL.getPointerTypeSizeInBits(Src)) 674 return 0; 675 break; 676 } 677 case Instruction::BitCast: 678 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 679 // Identity and pointer-to-pointer casts are free. 680 return 0; 681 break; 682 case Instruction::Trunc: { 683 // trunc to a native type is free (assuming the target has compare and 684 // shift-right of the same width). 685 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 686 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue())) 687 return 0; 688 break; 689 } 690 } 691 return 1; 692 } 693 694 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 695 VectorType *VecTy, 696 unsigned Index) const { 697 return 1; 698 } 699 700 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 701 const Instruction *I = nullptr) const { 702 // A phi would be free, unless we're costing the throughput because it 703 // will require a register. 704 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 705 return 0; 706 return 1; 707 } 708 709 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 710 CmpInst::Predicate VecPred, 711 TTI::TargetCostKind CostKind, 712 TTI::OperandValueInfo Op1Info, 713 TTI::OperandValueInfo Op2Info, 714 const Instruction *I) const { 715 return 1; 716 } 717 718 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 719 TTI::TargetCostKind CostKind, 720 unsigned Index, Value *Op0, 721 Value *Op1) const { 722 return 1; 723 } 724 725 /// \param ScalarUserAndIdx encodes the information about extracts from a 726 /// vector with 'Scalar' being the value being extracted,'User' being the user 727 /// of the extract(nullptr if user is not known before vectorization) and 728 /// 'Idx' being the extract lane. 729 InstructionCost getVectorInstrCost( 730 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, 731 Value *Scalar, 732 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const { 733 return 1; 734 } 735 736 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 737 TTI::TargetCostKind CostKind, 738 unsigned Index) const { 739 return 1; 740 } 741 742 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, 743 const APInt &DemandedDstElts, 744 TTI::TargetCostKind CostKind) { 745 return 1; 746 } 747 748 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 749 unsigned AddressSpace, 750 TTI::TargetCostKind CostKind, 751 TTI::OperandValueInfo OpInfo, 752 const Instruction *I) const { 753 return 1; 754 } 755 756 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 757 unsigned AddressSpace, 758 TTI::TargetCostKind CostKind, 759 const Instruction *I) const { 760 return 1; 761 } 762 763 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 764 Align Alignment, unsigned AddressSpace, 765 TTI::TargetCostKind CostKind) const { 766 return 1; 767 } 768 769 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 770 const Value *Ptr, bool VariableMask, 771 Align Alignment, 772 TTI::TargetCostKind CostKind, 773 const Instruction *I = nullptr) const { 774 return 1; 775 } 776 777 InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, 778 const Value *Ptr, bool VariableMask, 779 Align Alignment, 780 TTI::TargetCostKind CostKind, 781 const Instruction *I = nullptr) const { 782 return InstructionCost::getInvalid(); 783 } 784 785 unsigned getInterleavedMemoryOpCost( 786 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 787 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 788 bool UseMaskForCond, bool UseMaskForGaps) const { 789 return 1; 790 } 791 792 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 793 TTI::TargetCostKind CostKind) const { 794 switch (ICA.getID()) { 795 default: 796 break; 797 case Intrinsic::experimental_vector_histogram_add: 798 // For now, we want explicit support from the target for histograms. 799 return InstructionCost::getInvalid(); 800 case Intrinsic::allow_runtime_check: 801 case Intrinsic::allow_ubsan_check: 802 case Intrinsic::annotation: 803 case Intrinsic::assume: 804 case Intrinsic::sideeffect: 805 case Intrinsic::pseudoprobe: 806 case Intrinsic::arithmetic_fence: 807 case Intrinsic::dbg_assign: 808 case Intrinsic::dbg_declare: 809 case Intrinsic::dbg_value: 810 case Intrinsic::dbg_label: 811 case Intrinsic::invariant_start: 812 case Intrinsic::invariant_end: 813 case Intrinsic::launder_invariant_group: 814 case Intrinsic::strip_invariant_group: 815 case Intrinsic::is_constant: 816 case Intrinsic::lifetime_start: 817 case Intrinsic::lifetime_end: 818 case Intrinsic::experimental_noalias_scope_decl: 819 case Intrinsic::objectsize: 820 case Intrinsic::ptr_annotation: 821 case Intrinsic::var_annotation: 822 case Intrinsic::experimental_gc_result: 823 case Intrinsic::experimental_gc_relocate: 824 case Intrinsic::coro_alloc: 825 case Intrinsic::coro_begin: 826 case Intrinsic::coro_begin_custom_abi: 827 case Intrinsic::coro_free: 828 case Intrinsic::coro_end: 829 case Intrinsic::coro_frame: 830 case Intrinsic::coro_size: 831 case Intrinsic::coro_align: 832 case Intrinsic::coro_suspend: 833 case Intrinsic::coro_subfn_addr: 834 case Intrinsic::threadlocal_address: 835 case Intrinsic::experimental_widenable_condition: 836 case Intrinsic::ssa_copy: 837 // These intrinsics don't actually represent code after lowering. 838 return 0; 839 } 840 return 1; 841 } 842 843 InstructionCost getCallInstrCost(Function *F, Type *RetTy, 844 ArrayRef<Type *> Tys, 845 TTI::TargetCostKind CostKind) const { 846 return 1; 847 } 848 849 // Assume that we have a register of the right size for the type. 850 unsigned getNumberOfParts(Type *Tp) const { return 1; } 851 852 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, 853 const SCEV *) const { 854 return 0; 855 } 856 857 InstructionCost getArithmeticReductionCost(unsigned, VectorType *, 858 std::optional<FastMathFlags> FMF, 859 TTI::TargetCostKind) const { 860 return 1; 861 } 862 863 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, 864 FastMathFlags, 865 TTI::TargetCostKind) const { 866 return 1; 867 } 868 869 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, 870 Type *ResTy, VectorType *Ty, 871 FastMathFlags FMF, 872 TTI::TargetCostKind CostKind) const { 873 return 1; 874 } 875 876 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, 877 VectorType *Ty, 878 TTI::TargetCostKind CostKind) const { 879 return 1; 880 } 881 882 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 883 return 0; 884 } 885 886 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { 887 return false; 888 } 889 890 unsigned getAtomicMemIntrinsicMaxElementSize() const { 891 // Note for overrides: You must ensure for all element unordered-atomic 892 // memory intrinsics that all power-of-2 element sizes up to, and 893 // including, the return value of this method have a corresponding 894 // runtime lib call. These runtime lib call definitions can be found 895 // in RuntimeLibcalls.h 896 return 0; 897 } 898 899 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 900 Type *ExpectedType) const { 901 return nullptr; 902 } 903 904 Type * 905 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 906 unsigned SrcAddrSpace, unsigned DestAddrSpace, 907 Align SrcAlign, Align DestAlign, 908 std::optional<uint32_t> AtomicElementSize) const { 909 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8) 910 : Type::getInt8Ty(Context); 911 } 912 913 void getMemcpyLoopResidualLoweringType( 914 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 915 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 916 Align SrcAlign, Align DestAlign, 917 std::optional<uint32_t> AtomicCpySize) const { 918 unsigned OpSizeInBytes = AtomicCpySize.value_or(1); 919 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8); 920 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) 921 OpsOut.push_back(OpType); 922 } 923 924 bool areInlineCompatible(const Function *Caller, 925 const Function *Callee) const { 926 return (Caller->getFnAttribute("target-cpu") == 927 Callee->getFnAttribute("target-cpu")) && 928 (Caller->getFnAttribute("target-features") == 929 Callee->getFnAttribute("target-features")); 930 } 931 932 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, 933 unsigned DefaultCallPenalty) const { 934 return DefaultCallPenalty; 935 } 936 937 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 938 const ArrayRef<Type *> &Types) const { 939 return (Caller->getFnAttribute("target-cpu") == 940 Callee->getFnAttribute("target-cpu")) && 941 (Caller->getFnAttribute("target-features") == 942 Callee->getFnAttribute("target-features")); 943 } 944 945 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 946 const DataLayout &DL) const { 947 return false; 948 } 949 950 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 951 const DataLayout &DL) const { 952 return false; 953 } 954 955 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 956 957 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 958 959 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 960 961 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 962 unsigned AddrSpace) const { 963 return true; 964 } 965 966 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 967 unsigned AddrSpace) const { 968 return true; 969 } 970 971 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 972 ElementCount VF) const { 973 return true; 974 } 975 976 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } 977 978 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 979 unsigned ChainSizeInBytes, 980 VectorType *VecTy) const { 981 return VF; 982 } 983 984 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 985 unsigned ChainSizeInBytes, 986 VectorType *VecTy) const { 987 return VF; 988 } 989 990 bool preferFixedOverScalableIfEqualCost() const { return false; } 991 992 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 993 TTI::ReductionFlags Flags) const { 994 return false; 995 } 996 997 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 998 TTI::ReductionFlags Flags) const { 999 return false; 1000 } 1001 1002 bool preferEpilogueVectorization() const { 1003 return true; 1004 } 1005 1006 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } 1007 1008 TTI::ReductionShuffle 1009 getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const { 1010 return TTI::ReductionShuffle::SplitHalf; 1011 } 1012 1013 unsigned getGISelRematGlobalCost() const { return 1; } 1014 1015 unsigned getMinTripCountTailFoldingThreshold() const { return 0; } 1016 1017 bool supportsScalableVectors() const { return false; } 1018 1019 bool enableScalableVectorization() const { return false; } 1020 1021 bool hasActiveVectorLength(unsigned Opcode, Type *DataType, 1022 Align Alignment) const { 1023 return false; 1024 } 1025 1026 bool isProfitableToSinkOperands(Instruction *I, 1027 SmallVectorImpl<Use *> &Ops) const { 1028 return false; 1029 } 1030 1031 bool isVectorShiftByScalarCheap(Type *Ty) const { return false; } 1032 1033 TargetTransformInfo::VPLegalization 1034 getVPLegalizationStrategy(const VPIntrinsic &PI) const { 1035 return TargetTransformInfo::VPLegalization( 1036 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, 1037 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); 1038 } 1039 1040 bool hasArmWideBranch(bool) const { return false; } 1041 1042 uint64_t getFeatureMask(const Function &F) const { return 0; } 1043 1044 bool isMultiversionedFunction(const Function &F) const { return false; } 1045 1046 unsigned getMaxNumArgs() const { return UINT_MAX; } 1047 1048 unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const { 1049 return 0; 1050 } 1051 1052 void collectKernelLaunchBounds( 1053 const Function &F, 1054 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {} 1055 1056 protected: 1057 // Obtain the minimum required size to hold the value (without the sign) 1058 // In case of a vector it returns the min required size for one element. 1059 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 1060 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 1061 const auto *VectorValue = cast<Constant>(Val); 1062 1063 // In case of a vector need to pick the max between the min 1064 // required size for each element 1065 auto *VT = cast<FixedVectorType>(Val->getType()); 1066 1067 // Assume unsigned elements 1068 isSigned = false; 1069 1070 // The max required size is the size of the vector element type 1071 unsigned MaxRequiredSize = 1072 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue(); 1073 1074 unsigned MinRequiredSize = 0; 1075 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 1076 if (auto *IntElement = 1077 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 1078 bool signedElement = IntElement->getValue().isNegative(); 1079 // Get the element min required size. 1080 unsigned ElementMinRequiredSize = 1081 IntElement->getValue().getSignificantBits() - 1; 1082 // In case one element is signed then all the vector is signed. 1083 isSigned |= signedElement; 1084 // Save the max required bit size between all the elements. 1085 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 1086 } else { 1087 // not an int constant element 1088 return MaxRequiredSize; 1089 } 1090 } 1091 return MinRequiredSize; 1092 } 1093 1094 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 1095 isSigned = CI->getValue().isNegative(); 1096 return CI->getValue().getSignificantBits() - 1; 1097 } 1098 1099 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 1100 isSigned = true; 1101 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 1102 } 1103 1104 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 1105 isSigned = false; 1106 return Cast->getSrcTy()->getScalarSizeInBits(); 1107 } 1108 1109 isSigned = false; 1110 return Val->getType()->getScalarSizeInBits(); 1111 } 1112 1113 bool isStridedAccess(const SCEV *Ptr) const { 1114 return Ptr && isa<SCEVAddRecExpr>(Ptr); 1115 } 1116 1117 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 1118 const SCEV *Ptr) const { 1119 if (!isStridedAccess(Ptr)) 1120 return nullptr; 1121 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 1122 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 1123 } 1124 1125 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 1126 int64_t MergeDistance) const { 1127 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 1128 if (!Step) 1129 return false; 1130 APInt StrideVal = Step->getAPInt(); 1131 if (StrideVal.getBitWidth() > 64) 1132 return false; 1133 // FIXME: Need to take absolute value for negative stride case. 1134 return StrideVal.getSExtValue() < MergeDistance; 1135 } 1136 }; 1137 1138 /// CRTP base class for use as a mix-in that aids implementing 1139 /// a TargetTransformInfo-compatible class. 1140 template <typename T> 1141 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 1142 private: 1143 typedef TargetTransformInfoImplBase BaseT; 1144 1145 protected: 1146 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 1147 1148 public: 1149 using BaseT::getGEPCost; 1150 1151 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 1152 ArrayRef<const Value *> Operands, Type *AccessType, 1153 TTI::TargetCostKind CostKind) { 1154 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 1155 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 1156 bool HasBaseReg = (BaseGV == nullptr); 1157 1158 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 1159 APInt BaseOffset(PtrSizeBits, 0); 1160 int64_t Scale = 0; 1161 1162 auto GTI = gep_type_begin(PointeeType, Operands); 1163 Type *TargetType = nullptr; 1164 1165 // Handle the case where the GEP instruction has a single operand, 1166 // the basis, therefore TargetType is a nullptr. 1167 if (Operands.empty()) 1168 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 1169 1170 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 1171 TargetType = GTI.getIndexedType(); 1172 // We assume that the cost of Scalar GEP with constant index and the 1173 // cost of Vector GEP with splat constant index are the same. 1174 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 1175 if (!ConstIdx) 1176 if (auto Splat = getSplatValue(*I)) 1177 ConstIdx = dyn_cast<ConstantInt>(Splat); 1178 if (StructType *STy = GTI.getStructTypeOrNull()) { 1179 // For structures the index is always splat or scalar constant 1180 assert(ConstIdx && "Unexpected GEP index"); 1181 uint64_t Field = ConstIdx->getZExtValue(); 1182 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 1183 } else { 1184 // If this operand is a scalable type, bail out early. 1185 // TODO: Make isLegalAddressingMode TypeSize aware. 1186 if (TargetType->isScalableTy()) 1187 return TTI::TCC_Basic; 1188 int64_t ElementSize = 1189 GTI.getSequentialElementStride(DL).getFixedValue(); 1190 if (ConstIdx) { 1191 BaseOffset += 1192 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 1193 } else { 1194 // Needs scale register. 1195 if (Scale != 0) 1196 // No addressing mode takes two scale registers. 1197 return TTI::TCC_Basic; 1198 Scale = ElementSize; 1199 } 1200 } 1201 } 1202 1203 // If we haven't been provided a hint, use the target type for now. 1204 // 1205 // TODO: Take a look at potentially removing this: This is *slightly* wrong 1206 // as it's possible to have a GEP with a foldable target type but a memory 1207 // access that isn't foldable. For example, this load isn't foldable on 1208 // RISC-V: 1209 // 1210 // %p = getelementptr i32, ptr %base, i32 42 1211 // %x = load <2 x i32>, ptr %p 1212 if (!AccessType) 1213 AccessType = TargetType; 1214 1215 // If the final address of the GEP is a legal addressing mode for the given 1216 // access type, then we can fold it into its users. 1217 if (static_cast<T *>(this)->isLegalAddressingMode( 1218 AccessType, const_cast<GlobalValue *>(BaseGV), 1219 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 1220 Ptr->getType()->getPointerAddressSpace())) 1221 return TTI::TCC_Free; 1222 1223 // TODO: Instead of returning TCC_Basic here, we should use 1224 // getArithmeticInstrCost. Or better yet, provide a hook to let the target 1225 // model it. 1226 return TTI::TCC_Basic; 1227 } 1228 1229 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs, 1230 const Value *Base, 1231 const TTI::PointersChainInfo &Info, 1232 Type *AccessTy, 1233 TTI::TargetCostKind CostKind) { 1234 InstructionCost Cost = TTI::TCC_Free; 1235 // In the basic model we take into account GEP instructions only 1236 // (although here can come alloca instruction, a value, constants and/or 1237 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a 1238 // pointer). Typically, if Base is a not a GEP-instruction and all the 1239 // pointers are relative to the same base address, all the rest are 1240 // either GEP instructions, PHIs, bitcasts or constants. When we have same 1241 // base, we just calculate cost of each non-Base GEP as an ADD operation if 1242 // any their index is a non-const. 1243 // If no known dependecies between the pointers cost is calculated as a sum 1244 // of costs of GEP instructions. 1245 for (const Value *V : Ptrs) { 1246 const auto *GEP = dyn_cast<GetElementPtrInst>(V); 1247 if (!GEP) 1248 continue; 1249 if (Info.isSameBase() && V != Base) { 1250 if (GEP->hasAllConstantIndices()) 1251 continue; 1252 Cost += static_cast<T *>(this)->getArithmeticInstrCost( 1253 Instruction::Add, GEP->getType(), CostKind, 1254 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, 1255 {}); 1256 } else { 1257 SmallVector<const Value *> Indices(GEP->indices()); 1258 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), 1259 GEP->getPointerOperand(), 1260 Indices, AccessTy, CostKind); 1261 } 1262 } 1263 return Cost; 1264 } 1265 1266 InstructionCost getInstructionCost(const User *U, 1267 ArrayRef<const Value *> Operands, 1268 TTI::TargetCostKind CostKind) { 1269 using namespace llvm::PatternMatch; 1270 1271 auto *TargetTTI = static_cast<T *>(this); 1272 // Handle non-intrinsic calls, invokes, and callbr. 1273 // FIXME: Unlikely to be true for anything but CodeSize. 1274 auto *CB = dyn_cast<CallBase>(U); 1275 if (CB && !isa<IntrinsicInst>(U)) { 1276 if (const Function *F = CB->getCalledFunction()) { 1277 if (!TargetTTI->isLoweredToCall(F)) 1278 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 1279 1280 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 1281 } 1282 // For indirect or other calls, scale cost by number of arguments. 1283 return TTI::TCC_Basic * (CB->arg_size() + 1); 1284 } 1285 1286 Type *Ty = U->getType(); 1287 unsigned Opcode = Operator::getOpcode(U); 1288 auto *I = dyn_cast<Instruction>(U); 1289 switch (Opcode) { 1290 default: 1291 break; 1292 case Instruction::Call: { 1293 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 1294 auto *Intrinsic = cast<IntrinsicInst>(U); 1295 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 1296 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 1297 } 1298 case Instruction::Br: 1299 case Instruction::Ret: 1300 case Instruction::PHI: 1301 case Instruction::Switch: 1302 return TargetTTI->getCFInstrCost(Opcode, CostKind, I); 1303 case Instruction::ExtractValue: 1304 case Instruction::Freeze: 1305 return TTI::TCC_Free; 1306 case Instruction::Alloca: 1307 if (cast<AllocaInst>(U)->isStaticAlloca()) 1308 return TTI::TCC_Free; 1309 break; 1310 case Instruction::GetElementPtr: { 1311 const auto *GEP = cast<GEPOperator>(U); 1312 Type *AccessType = nullptr; 1313 // For now, only provide the AccessType in the simple case where the GEP 1314 // only has one user. 1315 if (GEP->hasOneUser() && I) 1316 AccessType = I->user_back()->getAccessType(); 1317 1318 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 1319 Operands.front(), Operands.drop_front(), 1320 AccessType, CostKind); 1321 } 1322 case Instruction::Add: 1323 case Instruction::FAdd: 1324 case Instruction::Sub: 1325 case Instruction::FSub: 1326 case Instruction::Mul: 1327 case Instruction::FMul: 1328 case Instruction::UDiv: 1329 case Instruction::SDiv: 1330 case Instruction::FDiv: 1331 case Instruction::URem: 1332 case Instruction::SRem: 1333 case Instruction::FRem: 1334 case Instruction::Shl: 1335 case Instruction::LShr: 1336 case Instruction::AShr: 1337 case Instruction::And: 1338 case Instruction::Or: 1339 case Instruction::Xor: 1340 case Instruction::FNeg: { 1341 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]); 1342 TTI::OperandValueInfo Op2Info; 1343 if (Opcode != Instruction::FNeg) 1344 Op2Info = TTI::getOperandInfo(Operands[1]); 1345 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, 1346 Op2Info, Operands, I); 1347 } 1348 case Instruction::IntToPtr: 1349 case Instruction::PtrToInt: 1350 case Instruction::SIToFP: 1351 case Instruction::UIToFP: 1352 case Instruction::FPToUI: 1353 case Instruction::FPToSI: 1354 case Instruction::Trunc: 1355 case Instruction::FPTrunc: 1356 case Instruction::BitCast: 1357 case Instruction::FPExt: 1358 case Instruction::SExt: 1359 case Instruction::ZExt: 1360 case Instruction::AddrSpaceCast: { 1361 Type *OpTy = Operands[0]->getType(); 1362 return TargetTTI->getCastInstrCost( 1363 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 1364 } 1365 case Instruction::Store: { 1366 auto *SI = cast<StoreInst>(U); 1367 Type *ValTy = Operands[0]->getType(); 1368 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]); 1369 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 1370 SI->getPointerAddressSpace(), CostKind, 1371 OpInfo, I); 1372 } 1373 case Instruction::Load: { 1374 // FIXME: Arbitary cost which could come from the backend. 1375 if (CostKind == TTI::TCK_Latency) 1376 return 4; 1377 auto *LI = cast<LoadInst>(U); 1378 Type *LoadType = U->getType(); 1379 // If there is a non-register sized type, the cost estimation may expand 1380 // it to be several instructions to load into multiple registers on the 1381 // target. But, if the only use of the load is a trunc instruction to a 1382 // register sized type, the instruction selector can combine these 1383 // instructions to be a single load. So, in this case, we use the 1384 // destination type of the trunc instruction rather than the load to 1385 // accurately estimate the cost of this load instruction. 1386 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() && 1387 !LoadType->isVectorTy()) { 1388 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin())) 1389 LoadType = TI->getDestTy(); 1390 } 1391 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(), 1392 LI->getPointerAddressSpace(), CostKind, 1393 {TTI::OK_AnyValue, TTI::OP_None}, I); 1394 } 1395 case Instruction::Select: { 1396 const Value *Op0, *Op1; 1397 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || 1398 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { 1399 // select x, y, false --> x & y 1400 // select x, true, y --> x | y 1401 const auto Op1Info = TTI::getOperandInfo(Op0); 1402 const auto Op2Info = TTI::getOperandInfo(Op1); 1403 assert(Op0->getType()->getScalarSizeInBits() == 1 && 1404 Op1->getType()->getScalarSizeInBits() == 1); 1405 1406 SmallVector<const Value *, 2> Operands{Op0, Op1}; 1407 return TargetTTI->getArithmeticInstrCost( 1408 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, 1409 CostKind, Op1Info, Op2Info, Operands, I); 1410 } 1411 const auto Op1Info = TTI::getOperandInfo(Operands[1]); 1412 const auto Op2Info = TTI::getOperandInfo(Operands[2]); 1413 Type *CondTy = Operands[0]->getType(); 1414 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 1415 CmpInst::BAD_ICMP_PREDICATE, 1416 CostKind, Op1Info, Op2Info, I); 1417 } 1418 case Instruction::ICmp: 1419 case Instruction::FCmp: { 1420 const auto Op1Info = TTI::getOperandInfo(Operands[0]); 1421 const auto Op2Info = TTI::getOperandInfo(Operands[1]); 1422 Type *ValTy = Operands[0]->getType(); 1423 // TODO: Also handle ICmp/FCmp constant expressions. 1424 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1425 I ? cast<CmpInst>(I)->getPredicate() 1426 : CmpInst::BAD_ICMP_PREDICATE, 1427 CostKind, Op1Info, Op2Info, I); 1428 } 1429 case Instruction::InsertElement: { 1430 auto *IE = dyn_cast<InsertElementInst>(U); 1431 if (!IE) 1432 return TTI::TCC_Basic; // FIXME 1433 unsigned Idx = -1; 1434 if (auto *CI = dyn_cast<ConstantInt>(Operands[2])) 1435 if (CI->getValue().getActiveBits() <= 32) 1436 Idx = CI->getZExtValue(); 1437 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx); 1438 } 1439 case Instruction::ShuffleVector: { 1440 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1441 if (!Shuffle) 1442 return TTI::TCC_Basic; // FIXME 1443 1444 auto *VecTy = cast<VectorType>(U->getType()); 1445 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType()); 1446 ArrayRef<int> Mask = Shuffle->getShuffleMask(); 1447 int NumSubElts, SubIndex; 1448 1449 // TODO: move more of this inside improveShuffleKindFromMask. 1450 if (Shuffle->changesLength()) { 1451 // Treat a 'subvector widening' as a free shuffle. 1452 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) 1453 return 0; 1454 1455 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1456 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, 1457 Mask, CostKind, SubIndex, VecTy, 1458 Operands, Shuffle); 1459 1460 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1461 return TargetTTI->getShuffleCost( 1462 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex, 1463 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1464 Operands, Shuffle); 1465 1466 int ReplicationFactor, VF; 1467 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { 1468 APInt DemandedDstElts = APInt::getZero(Mask.size()); 1469 for (auto I : enumerate(Mask)) { 1470 if (I.value() != PoisonMaskElem) 1471 DemandedDstElts.setBit(I.index()); 1472 } 1473 return TargetTTI->getReplicationShuffleCost( 1474 VecSrcTy->getElementType(), ReplicationFactor, VF, 1475 DemandedDstElts, CostKind); 1476 } 1477 1478 bool IsUnary = isa<UndefValue>(Operands[1]); 1479 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue(); 1480 SmallVector<int, 16> AdjustMask(Mask); 1481 1482 // Widening shuffle - widening the source(s) to the new length 1483 // (treated as free - see above), and then perform the adjusted 1484 // shuffle at that width. 1485 if (Shuffle->increasesLength()) { 1486 for (int &M : AdjustMask) 1487 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M; 1488 1489 return TargetTTI->getShuffleCost( 1490 IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy, 1491 AdjustMask, CostKind, 0, nullptr, Operands, Shuffle); 1492 } 1493 1494 // Narrowing shuffle - perform shuffle at original wider width and 1495 // then extract the lower elements. 1496 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem); 1497 1498 InstructionCost ShuffleCost = TargetTTI->getShuffleCost( 1499 IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, 1500 VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle); 1501 1502 SmallVector<int, 16> ExtractMask(Mask.size()); 1503 std::iota(ExtractMask.begin(), ExtractMask.end(), 0); 1504 return ShuffleCost + TargetTTI->getShuffleCost( 1505 TTI::SK_ExtractSubvector, VecSrcTy, 1506 ExtractMask, CostKind, 0, VecTy, {}, Shuffle); 1507 } 1508 1509 if (Shuffle->isIdentity()) 1510 return 0; 1511 1512 if (Shuffle->isReverse()) 1513 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind, 1514 0, nullptr, Operands, Shuffle); 1515 1516 if (Shuffle->isSelect()) 1517 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind, 1518 0, nullptr, Operands, Shuffle); 1519 1520 if (Shuffle->isTranspose()) 1521 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask, 1522 CostKind, 0, nullptr, Operands, 1523 Shuffle); 1524 1525 if (Shuffle->isZeroEltSplat()) 1526 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask, 1527 CostKind, 0, nullptr, Operands, 1528 Shuffle); 1529 1530 if (Shuffle->isSingleSource()) 1531 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask, 1532 CostKind, 0, nullptr, Operands, 1533 Shuffle); 1534 1535 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1536 return TargetTTI->getShuffleCost( 1537 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex, 1538 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands, 1539 Shuffle); 1540 1541 if (Shuffle->isSplice(SubIndex)) 1542 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind, 1543 SubIndex, nullptr, Operands, Shuffle); 1544 1545 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask, 1546 CostKind, 0, nullptr, Operands, Shuffle); 1547 } 1548 case Instruction::ExtractElement: { 1549 auto *EEI = dyn_cast<ExtractElementInst>(U); 1550 if (!EEI) 1551 return TTI::TCC_Basic; // FIXME 1552 unsigned Idx = -1; 1553 if (auto *CI = dyn_cast<ConstantInt>(Operands[1])) 1554 if (CI->getValue().getActiveBits() <= 32) 1555 Idx = CI->getZExtValue(); 1556 Type *DstTy = Operands[0]->getType(); 1557 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx); 1558 } 1559 } 1560 1561 // By default, just classify everything as 'basic' or -1 to represent that 1562 // don't know the throughput cost. 1563 return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic; 1564 } 1565 1566 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { 1567 auto *TargetTTI = static_cast<T *>(this); 1568 SmallVector<const Value *, 4> Ops(I->operand_values()); 1569 InstructionCost Cost = TargetTTI->getInstructionCost( 1570 I, Ops, TargetTransformInfo::TCK_SizeAndLatency); 1571 return Cost >= TargetTransformInfo::TCC_Expensive; 1572 } 1573 1574 bool supportsTailCallFor(const CallBase *CB) const { 1575 return static_cast<const T *>(this)->supportsTailCalls(); 1576 } 1577 }; 1578 } // namespace llvm 1579 1580 #endif 1581