1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file a TargetTransformInfo::Concept conforming object specific to the 10 /// AArch64 target machine. It uses the target's detailed information to 11 /// provide more precise answers to certain TTI queries, while letting the 12 /// target independent and default TTI implementations handle the rest. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 18 19 #include "AArch64.h" 20 #include "AArch64Subtarget.h" 21 #include "AArch64TargetMachine.h" 22 #include "llvm/Analysis/TargetTransformInfo.h" 23 #include "llvm/CodeGen/BasicTTIImpl.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/Support/InstructionCost.h" 27 #include <cstdint> 28 #include <optional> 29 30 namespace llvm { 31 32 class APInt; 33 class Instruction; 34 class IntrinsicInst; 35 class Loop; 36 class SCEV; 37 class ScalarEvolution; 38 class Type; 39 class Value; 40 class VectorType; 41 42 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { 43 using BaseT = BasicTTIImplBase<AArch64TTIImpl>; 44 using TTI = TargetTransformInfo; 45 46 friend BaseT; 47 48 const AArch64Subtarget *ST; 49 const AArch64TargetLowering *TLI; 50 51 const AArch64Subtarget *getST() const { return ST; } 52 const AArch64TargetLowering *getTLI() const { return TLI; } 53 54 enum MemIntrinsicType { 55 VECTOR_LDST_TWO_ELEMENTS, 56 VECTOR_LDST_THREE_ELEMENTS, 57 VECTOR_LDST_FOUR_ELEMENTS 58 }; 59 60 bool isWideningInstruction(Type *DstTy, unsigned Opcode, 61 ArrayRef<const Value *> Args, 62 Type *SrcOverrideTy = nullptr); 63 64 // A helper function called by 'getVectorInstrCost'. 65 // 66 // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse' 67 // indicates whether the vector instruction is available in the input IR or 68 // just imaginary in vectorizer passes. 69 /// \param ScalarUserAndIdx encodes the information about extracts from a 70 /// vector with 'Scalar' being the value being extracted,'User' being the user 71 /// of the extract(nullptr if user is not known before vectorization) and 72 /// 'Idx' being the extract lane. 73 InstructionCost getVectorInstrCostHelper( 74 unsigned Opcode, Type *Val, unsigned Index, bool HasRealUse, 75 const Instruction *I = nullptr, Value *Scalar = nullptr, 76 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}); 77 78 public: 79 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) 80 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), 81 TLI(ST->getTargetLowering()) {} 82 83 bool areInlineCompatible(const Function *Caller, 84 const Function *Callee) const; 85 86 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 87 const ArrayRef<Type *> &Types) const; 88 89 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, 90 unsigned DefaultCallPenalty) const; 91 92 uint64_t getFeatureMask(const Function &F) const; 93 94 bool isMultiversionedFunction(const Function &F) const; 95 96 /// \name Scalar TTI Implementations 97 /// @{ 98 99 using BaseT::getIntImmCost; 100 InstructionCost getIntImmCost(int64_t Val); 101 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 102 TTI::TargetCostKind CostKind); 103 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 104 const APInt &Imm, Type *Ty, 105 TTI::TargetCostKind CostKind, 106 Instruction *Inst = nullptr); 107 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 108 const APInt &Imm, Type *Ty, 109 TTI::TargetCostKind CostKind); 110 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); 111 112 /// @} 113 114 /// \name Vector TTI Implementations 115 /// @{ 116 117 bool enableInterleavedAccessVectorization() { return true; } 118 119 bool enableMaskedInterleavedAccessVectorization() { return ST->hasSVE(); } 120 121 unsigned getNumberOfRegisters(unsigned ClassID) const { 122 bool Vector = (ClassID == 1); 123 if (Vector) { 124 if (ST->hasNEON()) 125 return 32; 126 return 0; 127 } 128 return 31; 129 } 130 131 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 132 TTI::TargetCostKind CostKind); 133 134 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 135 IntrinsicInst &II) const; 136 137 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 138 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 139 APInt &UndefElts2, APInt &UndefElts3, 140 std::function<void(Instruction *, unsigned, APInt, APInt &)> 141 SimplifyAndSetOp) const; 142 143 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; 144 145 unsigned getMinVectorRegisterBitWidth() const { 146 return ST->getMinVectorRegisterBitWidth(); 147 } 148 149 std::optional<unsigned> getVScaleForTuning() const { 150 return ST->getVScaleForTuning(); 151 } 152 153 bool isVScaleKnownToBeAPowerOfTwo() const { return true; } 154 155 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; 156 157 /// Try to return an estimate cost factor that can be used as a multiplier 158 /// when scalarizing an operation for a vector with ElementCount \p VF. 159 /// For scalable vectors this currently takes the most pessimistic view based 160 /// upon the maximum possible value for vscale. 161 unsigned getMaxNumElements(ElementCount VF) const { 162 if (!VF.isScalable()) 163 return VF.getFixedValue(); 164 165 return VF.getKnownMinValue() * ST->getVScaleForTuning(); 166 } 167 168 unsigned getMaxInterleaveFactor(ElementCount VF); 169 170 bool prefersVectorizedAddressing() const; 171 172 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 173 Align Alignment, unsigned AddressSpace, 174 TTI::TargetCostKind CostKind); 175 176 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 177 const Value *Ptr, bool VariableMask, 178 Align Alignment, 179 TTI::TargetCostKind CostKind, 180 const Instruction *I = nullptr); 181 182 bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src); 183 184 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 185 TTI::CastContextHint CCH, 186 TTI::TargetCostKind CostKind, 187 const Instruction *I = nullptr); 188 189 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 190 VectorType *VecTy, unsigned Index); 191 192 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 193 const Instruction *I = nullptr); 194 195 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 196 TTI::TargetCostKind CostKind, 197 unsigned Index, Value *Op0, Value *Op1); 198 199 /// \param ScalarUserAndIdx encodes the information about extracts from a 200 /// vector with 'Scalar' being the value being extracted,'User' being the user 201 /// of the extract(nullptr if user is not known before vectorization) and 202 /// 'Idx' being the extract lane. 203 InstructionCost getVectorInstrCost( 204 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, 205 Value *Scalar, 206 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx); 207 208 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 209 TTI::TargetCostKind CostKind, 210 unsigned Index); 211 212 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, 213 FastMathFlags FMF, 214 TTI::TargetCostKind CostKind); 215 216 InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, 217 VectorType *ValTy, 218 TTI::TargetCostKind CostKind); 219 220 InstructionCost getSpliceCost(VectorType *Tp, int Index); 221 222 InstructionCost getArithmeticInstrCost( 223 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 224 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 225 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 226 ArrayRef<const Value *> Args = {}, const Instruction *CxtI = nullptr); 227 228 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 229 const SCEV *Ptr); 230 231 InstructionCost getCmpSelInstrCost( 232 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, 233 TTI::TargetCostKind CostKind, 234 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 235 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 236 const Instruction *I = nullptr); 237 238 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 239 bool IsZeroCmp) const; 240 bool useNeonVector(const Type *Ty) const; 241 242 InstructionCost 243 getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, 244 unsigned AddressSpace, TTI::TargetCostKind CostKind, 245 TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, 246 const Instruction *I = nullptr); 247 248 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); 249 250 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 251 TTI::UnrollingPreferences &UP, 252 OptimizationRemarkEmitter *ORE); 253 254 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 255 TTI::PeelingPreferences &PP); 256 257 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 258 Type *ExpectedType); 259 260 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); 261 262 bool isElementTypeLegalForScalableVector(Type *Ty) const { 263 if (Ty->isPointerTy()) 264 return true; 265 266 if (Ty->isBFloatTy() && ST->hasBF16()) 267 return true; 268 269 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 270 return true; 271 272 if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || 273 Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) 274 return true; 275 276 return false; 277 } 278 279 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { 280 if (!ST->hasSVE()) 281 return false; 282 283 // For fixed vectors, avoid scalarization if using SVE for them. 284 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors() && 285 DataType->getPrimitiveSizeInBits() != 128) 286 return false; // Fall back to scalarization of masked operations. 287 288 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 289 } 290 291 bool isLegalMaskedLoad(Type *DataType, Align Alignment) { 292 return isLegalMaskedLoadStore(DataType, Alignment); 293 } 294 295 bool isLegalMaskedStore(Type *DataType, Align Alignment) { 296 return isLegalMaskedLoadStore(DataType, Alignment); 297 } 298 299 bool isLegalMaskedGatherScatter(Type *DataType) const { 300 if (!ST->isSVEAvailable()) 301 return false; 302 303 // For fixed vectors, scalarize if not using SVE for them. 304 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType); 305 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || 306 DataTypeFVTy->getNumElements() < 2)) 307 return false; 308 309 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 310 } 311 312 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 313 return isLegalMaskedGatherScatter(DataType); 314 } 315 316 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 317 return isLegalMaskedGatherScatter(DataType); 318 } 319 320 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 321 // Return true if we can generate a `ld1r` splat load instruction. 322 if (!ST->hasNEON() || NumElements.isScalable()) 323 return false; 324 switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) { 325 case 8: 326 case 16: 327 case 32: 328 case 64: { 329 // We accept bit-widths >= 64bits and elements {8,16,32,64} bits. 330 unsigned VectorBits = NumElements.getFixedValue() * ElementBits; 331 return VectorBits >= 64; 332 } 333 } 334 return false; 335 } 336 337 bool isLegalNTStoreLoad(Type *DataType, Align Alignment) { 338 // NOTE: The logic below is mostly geared towards LV, which calls it with 339 // vectors with 2 elements. We might want to improve that, if other 340 // users show up. 341 // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if 342 // the vector can be halved so that each half fits into a register. That's 343 // the case if the element type fits into a register and the number of 344 // elements is a power of 2 > 1. 345 if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) { 346 unsigned NumElements = DataTypeTy->getNumElements(); 347 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits(); 348 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && 349 EltSize <= 128 && isPowerOf2_64(EltSize); 350 } 351 return BaseT::isLegalNTStore(DataType, Alignment); 352 } 353 354 bool isLegalNTStore(Type *DataType, Align Alignment) { 355 return isLegalNTStoreLoad(DataType, Alignment); 356 } 357 358 bool isLegalNTLoad(Type *DataType, Align Alignment) { 359 // Only supports little-endian targets. 360 if (ST->isLittleEndian()) 361 return isLegalNTStoreLoad(DataType, Alignment); 362 return BaseT::isLegalNTLoad(DataType, Alignment); 363 } 364 365 InstructionCost 366 getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, 367 Type *AccumType, ElementCount VF, 368 TTI::PartialReductionExtendKind OpAExtend, 369 TTI::PartialReductionExtendKind OpBExtend, 370 std::optional<unsigned> BinOp) const; 371 372 bool enableOrderedReductions() const { return true; } 373 374 InstructionCost getInterleavedMemoryOpCost( 375 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 376 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 377 bool UseMaskForCond = false, bool UseMaskForGaps = false); 378 379 bool 380 shouldConsiderAddressTypePromotion(const Instruction &I, 381 bool &AllowPromotionWithoutCommonHeader); 382 383 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 384 385 unsigned getGISelRematGlobalCost() const { 386 return 2; 387 } 388 389 unsigned getMinTripCountTailFoldingThreshold() const { 390 return ST->hasSVE() ? 5 : 0; 391 } 392 393 TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const { 394 if (ST->hasSVE()) 395 return IVUpdateMayOverflow 396 ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck 397 : TailFoldingStyle::DataAndControlFlow; 398 399 return TailFoldingStyle::DataWithoutLaneMask; 400 } 401 402 bool preferFixedOverScalableIfEqualCost() const; 403 404 unsigned getEpilogueVectorizationMinVF() const; 405 406 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI); 407 408 bool supportsScalableVectors() const { 409 return ST->isSVEorStreamingSVEAvailable(); 410 } 411 412 bool enableScalableVectorization() const; 413 414 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 415 ElementCount VF) const; 416 417 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 418 TTI::ReductionFlags Flags) const { 419 return ST->hasSVE(); 420 } 421 422 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 423 std::optional<FastMathFlags> FMF, 424 TTI::TargetCostKind CostKind); 425 426 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 427 ArrayRef<int> Mask, 428 TTI::TargetCostKind CostKind, int Index, 429 VectorType *SubTp, 430 ArrayRef<const Value *> Args = {}, 431 const Instruction *CxtI = nullptr); 432 433 InstructionCost getScalarizationOverhead(VectorType *Ty, 434 const APInt &DemandedElts, 435 bool Insert, bool Extract, 436 TTI::TargetCostKind CostKind, 437 ArrayRef<Value *> VL = {}); 438 439 /// Return the cost of the scaling factor used in the addressing 440 /// mode represented by AM for this target, for a load/store 441 /// of the specified type. 442 /// If the AM is supported, the return value must be >= 0. 443 /// If the AM is not supported, it returns a negative value. 444 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 445 StackOffset BaseOffset, bool HasBaseReg, 446 int64_t Scale, unsigned AddrSpace) const; 447 448 bool enableSelectOptimize() { return ST->enableSelectOptimize(); } 449 450 bool shouldTreatInstructionLikeSelect(const Instruction *I); 451 452 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, 453 Type *ScalarValTy) const { 454 // We can vectorize store v4i8. 455 if (ScalarMemTy->isIntegerTy(8) && isPowerOf2_32(VF) && VF >= 4) 456 return 4; 457 458 return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); 459 } 460 461 std::optional<unsigned> getMinPageSize() const { return 4096; } 462 463 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, 464 const TargetTransformInfo::LSRCost &C2); 465 466 bool isProfitableToSinkOperands(Instruction *I, 467 SmallVectorImpl<Use *> &Ops) const; 468 /// @} 469 }; 470 471 } // end namespace llvm 472 473 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 474