1 //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file describes how to lower LLVM code to machine code. This has two 11 /// main components: 12 /// 13 /// 1. Which ValueTypes are natively supported by the target. 14 /// 2. Which operations are supported for supported ValueTypes. 15 /// 3. Cost thresholds for alternative implementations of certain operations. 16 /// 17 /// In addition it has a few other components, like information about FP 18 /// immediates. 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #ifndef LLVM_CODEGEN_TARGETLOWERING_H 23 #define LLVM_CODEGEN_TARGETLOWERING_H 24 25 #include "llvm/ADT/APInt.h" 26 #include "llvm/ADT/ArrayRef.h" 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/CodeGen/DAGCombine.h" 31 #include "llvm/CodeGen/ISDOpcodes.h" 32 #include "llvm/CodeGen/LowLevelTypeUtils.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RuntimeLibcallUtil.h" 35 #include "llvm/CodeGen/SelectionDAG.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/TargetCallingConv.h" 38 #include "llvm/CodeGen/ValueTypes.h" 39 #include "llvm/CodeGenTypes/MachineValueType.h" 40 #include "llvm/IR/Attributes.h" 41 #include "llvm/IR/CallingConv.h" 42 #include "llvm/IR/DataLayout.h" 43 #include "llvm/IR/DerivedTypes.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/InlineAsm.h" 46 #include "llvm/IR/Instruction.h" 47 #include "llvm/IR/Instructions.h" 48 #include "llvm/IR/RuntimeLibcalls.h" 49 #include "llvm/IR/Type.h" 50 #include "llvm/Support/Alignment.h" 51 #include "llvm/Support/AtomicOrdering.h" 52 #include "llvm/Support/Casting.h" 53 #include "llvm/Support/ErrorHandling.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <climits> 57 #include <cstdint> 58 #include <iterator> 59 #include <map> 60 #include <string> 61 #include <utility> 62 #include <vector> 63 64 namespace llvm { 65 66 class AssumptionCache; 67 class CCState; 68 class CCValAssign; 69 enum class ComplexDeinterleavingOperation; 70 enum class ComplexDeinterleavingRotation; 71 class Constant; 72 class FastISel; 73 class FunctionLoweringInfo; 74 class GlobalValue; 75 class Loop; 76 class GISelKnownBits; 77 class IntrinsicInst; 78 class IRBuilderBase; 79 struct KnownBits; 80 class LLVMContext; 81 class MachineBasicBlock; 82 class MachineFunction; 83 class MachineInstr; 84 class MachineJumpTableInfo; 85 class MachineLoop; 86 class MachineRegisterInfo; 87 class MCContext; 88 class MCExpr; 89 class Module; 90 class ProfileSummaryInfo; 91 class TargetLibraryInfo; 92 class TargetMachine; 93 class TargetRegisterClass; 94 class TargetRegisterInfo; 95 class TargetTransformInfo; 96 class Value; 97 98 namespace Sched { 99 100 enum Preference : uint8_t { 101 None, // No preference 102 Source, // Follow source order. 103 RegPressure, // Scheduling for lowest register pressure. 104 Hybrid, // Scheduling for both latency and register pressure. 105 ILP, // Scheduling for ILP in low register pressure mode. 106 VLIW, // Scheduling for VLIW targets. 107 Fast, // Fast suboptimal list scheduling 108 Linearize, // Linearize DAG, no scheduling 109 Last = Linearize // Marker for the last Sched::Preference 110 }; 111 112 } // end namespace Sched 113 114 // MemOp models a memory operation, either memset or memcpy/memmove. 115 struct MemOp { 116 private: 117 // Shared 118 uint64_t Size; 119 bool DstAlignCanChange; // true if destination alignment can satisfy any 120 // constraint. 121 Align DstAlign; // Specified alignment of the memory operation. 122 123 bool AllowOverlap; 124 // memset only 125 bool IsMemset; // If setthis memory operation is a memset. 126 bool ZeroMemset; // If set clears out memory with zeros. 127 // memcpy only 128 bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register 129 // constant so it does not need to be loaded. 130 Align SrcAlign; // Inferred alignment of the source or default value if the 131 // memory operation does not need to load the value. 132 public: 133 static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, 134 Align SrcAlign, bool IsVolatile, 135 bool MemcpyStrSrc = false) { 136 MemOp Op; 137 Op.Size = Size; 138 Op.DstAlignCanChange = DstAlignCanChange; 139 Op.DstAlign = DstAlign; 140 Op.AllowOverlap = !IsVolatile; 141 Op.IsMemset = false; 142 Op.ZeroMemset = false; 143 Op.MemcpyStrSrc = MemcpyStrSrc; 144 Op.SrcAlign = SrcAlign; 145 return Op; 146 } 147 148 static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, 149 bool IsZeroMemset, bool IsVolatile) { 150 MemOp Op; 151 Op.Size = Size; 152 Op.DstAlignCanChange = DstAlignCanChange; 153 Op.DstAlign = DstAlign; 154 Op.AllowOverlap = !IsVolatile; 155 Op.IsMemset = true; 156 Op.ZeroMemset = IsZeroMemset; 157 Op.MemcpyStrSrc = false; 158 return Op; 159 } 160 161 uint64_t size() const { return Size; } 162 Align getDstAlign() const { 163 assert(!DstAlignCanChange); 164 return DstAlign; 165 } 166 bool isFixedDstAlign() const { return !DstAlignCanChange; } 167 bool allowOverlap() const { return AllowOverlap; } 168 bool isMemset() const { return IsMemset; } 169 bool isMemcpy() const { return !IsMemset; } 170 bool isMemcpyWithFixedDstAlign() const { 171 return isMemcpy() && !DstAlignCanChange; 172 } 173 bool isZeroMemset() const { return isMemset() && ZeroMemset; } 174 bool isMemcpyStrSrc() const { 175 assert(isMemcpy() && "Must be a memcpy"); 176 return MemcpyStrSrc; 177 } 178 Align getSrcAlign() const { 179 assert(isMemcpy() && "Must be a memcpy"); 180 return SrcAlign; 181 } 182 bool isSrcAligned(Align AlignCheck) const { 183 return isMemset() || llvm::isAligned(AlignCheck, SrcAlign.value()); 184 } 185 bool isDstAligned(Align AlignCheck) const { 186 return DstAlignCanChange || llvm::isAligned(AlignCheck, DstAlign.value()); 187 } 188 bool isAligned(Align AlignCheck) const { 189 return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck); 190 } 191 }; 192 193 /// This base class for TargetLowering contains the SelectionDAG-independent 194 /// parts that can be used from the rest of CodeGen. 195 class TargetLoweringBase { 196 public: 197 /// This enum indicates whether operations are valid for a target, and if not, 198 /// what action should be used to make them valid. 199 enum LegalizeAction : uint8_t { 200 Legal, // The target natively supports this operation. 201 Promote, // This operation should be executed in a larger type. 202 Expand, // Try to expand this to other ops, otherwise use a libcall. 203 LibCall, // Don't try to expand this to other ops, always use a libcall. 204 Custom // Use the LowerOperation hook to implement custom lowering. 205 }; 206 207 /// This enum indicates whether a types are legal for a target, and if not, 208 /// what action should be used to make them valid. 209 enum LegalizeTypeAction : uint8_t { 210 TypeLegal, // The target natively supports this type. 211 TypePromoteInteger, // Replace this integer with a larger one. 212 TypeExpandInteger, // Split this integer into two of half the size. 213 TypeSoftenFloat, // Convert this float to a same size integer type. 214 TypeExpandFloat, // Split this float into two of half the size. 215 TypeScalarizeVector, // Replace this one-element vector with its element. 216 TypeSplitVector, // Split this vector into two of half the size. 217 TypeWidenVector, // This vector should be widened into a larger vector. 218 TypePromoteFloat, // Replace this float with a larger one. 219 TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic. 220 TypeScalarizeScalableVector, // This action is explicitly left unimplemented. 221 // While it is theoretically possible to 222 // legalize operations on scalable types with a 223 // loop that handles the vscale * #lanes of the 224 // vector, this is non-trivial at SelectionDAG 225 // level and these types are better to be 226 // widened or promoted. 227 }; 228 229 /// LegalizeKind holds the legalization kind that needs to happen to EVT 230 /// in order to type-legalize it. 231 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; 232 233 /// Enum that describes how the target represents true/false values. 234 enum BooleanContent { 235 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. 236 ZeroOrOneBooleanContent, // All bits zero except for bit 0. 237 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. 238 }; 239 240 /// Enum that describes what type of support for selects the target has. 241 enum SelectSupportKind { 242 ScalarValSelect, // The target supports scalar selects (ex: cmov). 243 ScalarCondVectorVal, // The target supports selects with a scalar condition 244 // and vector values (ex: cmov). 245 VectorMaskSelect // The target supports vector selects with a vector 246 // mask (ex: x86 blends). 247 }; 248 249 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded 250 /// to, if at all. Exists because different targets have different levels of 251 /// support for these atomic instructions, and also have different options 252 /// w.r.t. what they should expand to. 253 enum class AtomicExpansionKind { 254 None, // Don't expand the instruction. 255 CastToInteger, // Cast the atomic instruction to another type, e.g. from 256 // floating-point to integer type. 257 LLSC, // Expand the instruction into loadlinked/storeconditional; used 258 // by ARM/AArch64. 259 LLOnly, // Expand the (load) instruction into just a load-linked, which has 260 // greater atomic guarantees than a normal load. 261 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. 262 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. 263 BitTestIntrinsic, // Use a target-specific intrinsic for special bit 264 // operations; used by X86. 265 CmpArithIntrinsic,// Use a target-specific intrinsic for special compare 266 // operations; used by X86. 267 Expand, // Generic expansion in terms of other atomic operations. 268 269 // Rewrite to a non-atomic form for use in a known non-preemptible 270 // environment. 271 NotAtomic 272 }; 273 274 /// Enum that specifies when a multiplication should be expanded. 275 enum class MulExpansionKind { 276 Always, // Always expand the instruction. 277 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal 278 // or custom. 279 }; 280 281 /// Enum that specifies when a float negation is beneficial. 282 enum class NegatibleCost { 283 Cheaper = 0, // Negated expression is cheaper. 284 Neutral = 1, // Negated expression has the same cost. 285 Expensive = 2 // Negated expression is more expensive. 286 }; 287 288 /// Enum of different potentially desirable ways to fold (and/or (setcc ...), 289 /// (setcc ...)). 290 enum AndOrSETCCFoldKind : uint8_t { 291 None = 0, // No fold is preferable. 292 AddAnd = 1, // Fold with `Add` op and `And` op is preferable. 293 NotAnd = 2, // Fold with `Not` op and `And` op is preferable. 294 ABS = 4, // Fold with `llvm.abs` op is preferable. 295 }; 296 297 class ArgListEntry { 298 public: 299 Value *Val = nullptr; 300 SDValue Node = SDValue(); 301 Type *Ty = nullptr; 302 bool IsSExt : 1; 303 bool IsZExt : 1; 304 bool IsNoExt : 1; 305 bool IsInReg : 1; 306 bool IsSRet : 1; 307 bool IsNest : 1; 308 bool IsByVal : 1; 309 bool IsByRef : 1; 310 bool IsInAlloca : 1; 311 bool IsPreallocated : 1; 312 bool IsReturned : 1; 313 bool IsSwiftSelf : 1; 314 bool IsSwiftAsync : 1; 315 bool IsSwiftError : 1; 316 bool IsCFGuardTarget : 1; 317 MaybeAlign Alignment = std::nullopt; 318 Type *IndirectType = nullptr; 319 320 ArgListEntry() 321 : IsSExt(false), IsZExt(false), IsNoExt(false), IsInReg(false), 322 IsSRet(false), IsNest(false), IsByVal(false), IsByRef(false), 323 IsInAlloca(false), IsPreallocated(false), IsReturned(false), 324 IsSwiftSelf(false), IsSwiftAsync(false), IsSwiftError(false), 325 IsCFGuardTarget(false) {} 326 327 void setAttributes(const CallBase *Call, unsigned ArgIdx); 328 }; 329 using ArgListTy = std::vector<ArgListEntry>; 330 331 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, 332 ArgListTy &Args) const {}; 333 334 static ISD::NodeType getExtendForContent(BooleanContent Content) { 335 switch (Content) { 336 case UndefinedBooleanContent: 337 // Extend by adding rubbish bits. 338 return ISD::ANY_EXTEND; 339 case ZeroOrOneBooleanContent: 340 // Extend by adding zero bits. 341 return ISD::ZERO_EXTEND; 342 case ZeroOrNegativeOneBooleanContent: 343 // Extend by copying the sign bit. 344 return ISD::SIGN_EXTEND; 345 } 346 llvm_unreachable("Invalid content kind"); 347 } 348 349 explicit TargetLoweringBase(const TargetMachine &TM); 350 TargetLoweringBase(const TargetLoweringBase &) = delete; 351 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; 352 virtual ~TargetLoweringBase() = default; 353 354 /// Return true if the target support strict float operation 355 bool isStrictFPEnabled() const { 356 return IsStrictFPEnabled; 357 } 358 359 protected: 360 /// Initialize all of the actions to default values. 361 void initActions(); 362 363 public: 364 const TargetMachine &getTargetMachine() const { return TM; } 365 366 virtual bool useSoftFloat() const { return false; } 367 368 /// Return the pointer type for the given address space, defaults to 369 /// the pointer type from the data layout. 370 /// FIXME: The default needs to be removed once all the code is updated. 371 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { 372 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); 373 } 374 375 /// Return the in-memory pointer type for the given address space, defaults to 376 /// the pointer type from the data layout. 377 /// FIXME: The default needs to be removed once all the code is updated. 378 virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { 379 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); 380 } 381 382 /// Return the type for frame index, which is determined by 383 /// the alloca address space specified through the data layout. 384 MVT getFrameIndexTy(const DataLayout &DL) const { 385 return getPointerTy(DL, DL.getAllocaAddrSpace()); 386 } 387 388 /// Return the type for code pointers, which is determined by the program 389 /// address space specified through the data layout. 390 MVT getProgramPointerTy(const DataLayout &DL) const { 391 return getPointerTy(DL, DL.getProgramAddressSpace()); 392 } 393 394 /// Return the type for operands of fence. 395 /// TODO: Let fence operands be of i32 type and remove this. 396 virtual MVT getFenceOperandTy(const DataLayout &DL) const { 397 return getPointerTy(DL); 398 } 399 400 /// Return the type to use for a scalar shift opcode, given the shifted amount 401 /// type. Targets should return a legal type if the input type is legal. 402 /// Targets can return a type that is too small if the input type is illegal. 403 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; 404 405 /// Returns the type for the shift amount of a shift opcode. For vectors, 406 /// returns the input type. For scalars, calls getScalarShiftAmountTy. 407 /// If getScalarShiftAmountTy type cannot represent all possible shift 408 /// amounts, returns MVT::i32. 409 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const; 410 411 /// Return the preferred type to use for a shift opcode, given the shifted 412 /// amount type is \p ShiftValueTy. 413 LLVM_READONLY 414 virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const { 415 return ShiftValueTy; 416 } 417 418 /// Returns the type to be used for the index operand of: 419 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, 420 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR 421 virtual MVT getVectorIdxTy(const DataLayout &DL) const { 422 return getPointerTy(DL); 423 } 424 425 /// Returns the type to be used for the EVL/AVL operand of VP nodes: 426 /// ISD::VP_ADD, ISD::VP_SUB, etc. It must be a legal scalar integer type, 427 /// and must be at least as large as i32. The EVL is implicitly zero-extended 428 /// to any larger type. 429 virtual MVT getVPExplicitVectorLengthTy() const { return MVT::i32; } 430 431 /// This callback is used to inspect load/store instructions and add 432 /// target-specific MachineMemOperand flags to them. The default 433 /// implementation does nothing. 434 virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const { 435 return MachineMemOperand::MONone; 436 } 437 438 /// This callback is used to inspect load/store SDNode. 439 /// The default implementation does nothing. 440 virtual MachineMemOperand::Flags 441 getTargetMMOFlags(const MemSDNode &Node) const { 442 return MachineMemOperand::MONone; 443 } 444 445 MachineMemOperand::Flags 446 getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL, 447 AssumptionCache *AC = nullptr, 448 const TargetLibraryInfo *LibInfo = nullptr) const; 449 MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, 450 const DataLayout &DL) const; 451 MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, 452 const DataLayout &DL) const; 453 454 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { 455 return true; 456 } 457 458 /// Return true if the @llvm.experimental.vector.partial.reduce.* intrinsic 459 /// should be expanded using generic code in SelectionDAGBuilder. 460 virtual bool 461 shouldExpandPartialReductionIntrinsic(const IntrinsicInst *I) const { 462 return true; 463 } 464 465 /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded 466 /// using generic code in SelectionDAGBuilder. 467 virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const { 468 return true; 469 } 470 471 virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF, 472 bool IsScalable) const { 473 return true; 474 } 475 476 /// Return true if the @llvm.experimental.cttz.elts intrinsic should be 477 /// expanded using generic code in SelectionDAGBuilder. 478 virtual bool shouldExpandCttzElements(EVT VT) const { return true; } 479 480 /// Return the minimum number of bits required to hold the maximum possible 481 /// number of trailing zero vector elements. 482 unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, 483 bool ZeroIsPoison, 484 const ConstantRange *VScaleRange) const; 485 486 /// Return true if the @llvm.experimental.vector.match intrinsic should be 487 /// expanded for vector type `VT' and search size `SearchSize' using generic 488 /// code in SelectionDAGBuilder. 489 virtual bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const { 490 return true; 491 } 492 493 // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to 494 // vecreduce(op(x, y)) for the reduction opcode RedOpc. 495 virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const { 496 return true; 497 } 498 499 /// Return true if it is profitable to convert a select of FP constants into 500 /// a constant pool load whose address depends on the select condition. The 501 /// parameter may be used to differentiate a select with FP compare from 502 /// integer compare. 503 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { 504 return true; 505 } 506 507 /// Return true if multiple condition registers are available. 508 bool hasMultipleConditionRegisters() const { 509 return HasMultipleConditionRegisters; 510 } 511 512 /// Return true if the target has BitExtract instructions. 513 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } 514 515 /// Return the preferred vector type legalization action. 516 virtual TargetLoweringBase::LegalizeTypeAction 517 getPreferredVectorAction(MVT VT) const { 518 // The default action for one element vectors is to scalarize 519 if (VT.getVectorElementCount().isScalar()) 520 return TypeScalarizeVector; 521 // The default action for an odd-width vector is to widen. 522 if (!VT.isPow2VectorType()) 523 return TypeWidenVector; 524 // The default action for other vectors is to promote 525 return TypePromoteInteger; 526 } 527 528 // Return true if the half type should be promoted using soft promotion rules 529 // where each operation is promoted to f32 individually, then converted to 530 // fp16. The default behavior is to promote chains of operations, keeping 531 // intermediate results in f32 precision and range. 532 virtual bool softPromoteHalfType() const { return false; } 533 534 // Return true if, for soft-promoted half, the half type should be passed 535 // passed to and returned from functions as f32. The default behavior is to 536 // pass as i16. If soft-promoted half is not used, this function is ignored 537 // and values are always passed and returned as f32. 538 virtual bool useFPRegsForHalfType() const { return false; } 539 540 // There are two general methods for expanding a BUILD_VECTOR node: 541 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle 542 // them together. 543 // 2. Build the vector on the stack and then load it. 544 // If this function returns true, then method (1) will be used, subject to 545 // the constraint that all of the necessary shuffles are legal (as determined 546 // by isShuffleMaskLegal). If this function returns false, then method (2) is 547 // always used. The vector type, and the number of defined values, are 548 // provided. 549 virtual bool 550 shouldExpandBuildVectorWithShuffles(EVT /* VT */, 551 unsigned DefinedValues) const { 552 return DefinedValues < 3; 553 } 554 555 /// Return true if integer divide is usually cheaper than a sequence of 556 /// several shifts, adds, and multiplies for this target. 557 /// The definition of "cheaper" may depend on whether we're optimizing 558 /// for speed or for size. 559 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } 560 561 /// Return true if the target can handle a standalone remainder operation. 562 virtual bool hasStandaloneRem(EVT VT) const { 563 return true; 564 } 565 566 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). 567 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { 568 // Default behavior is to replace SQRT(X) with X*RSQRT(X). 569 return false; 570 } 571 572 /// Reciprocal estimate status values used by the functions below. 573 enum ReciprocalEstimate : int { 574 Unspecified = -1, 575 Disabled = 0, 576 Enabled = 1 577 }; 578 579 /// Return a ReciprocalEstimate enum value for a square root of the given type 580 /// based on the function's attributes. If the operation is not overridden by 581 /// the function's attributes, "Unspecified" is returned and target defaults 582 /// are expected to be used for instruction selection. 583 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; 584 585 /// Return a ReciprocalEstimate enum value for a division of the given type 586 /// based on the function's attributes. If the operation is not overridden by 587 /// the function's attributes, "Unspecified" is returned and target defaults 588 /// are expected to be used for instruction selection. 589 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; 590 591 /// Return the refinement step count for a square root of the given type based 592 /// on the function's attributes. If the operation is not overridden by 593 /// the function's attributes, "Unspecified" is returned and target defaults 594 /// are expected to be used for instruction selection. 595 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; 596 597 /// Return the refinement step count for a division of the given type based 598 /// on the function's attributes. If the operation is not overridden by 599 /// the function's attributes, "Unspecified" is returned and target defaults 600 /// are expected to be used for instruction selection. 601 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; 602 603 /// Returns true if target has indicated at least one type should be bypassed. 604 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } 605 606 /// Returns map of slow types for division or remainder with corresponding 607 /// fast types 608 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { 609 return BypassSlowDivWidths; 610 } 611 612 /// Return true only if vscale must be a power of two. 613 virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; } 614 615 /// Return true if Flow Control is an expensive operation that should be 616 /// avoided. 617 bool isJumpExpensive() const { return JumpIsExpensive; } 618 619 // Costs parameters used by 620 // SelectionDAGBuilder::shouldKeepJumpConditionsTogether. 621 // shouldKeepJumpConditionsTogether will use these parameter value to 622 // determine if two conditions in the form `br (and/or cond1, cond2)` should 623 // be split into two branches or left as one. 624 // 625 // BaseCost is the cost threshold (in latency). If the estimated latency of 626 // computing both `cond1` and `cond2` is below the cost of just computing 627 // `cond1` + BaseCost, the two conditions will be kept together. Otherwise 628 // they will be split. 629 // 630 // LikelyBias increases BaseCost if branch probability info indicates that it 631 // is likely that both `cond1` and `cond2` will be computed. 632 // 633 // UnlikelyBias decreases BaseCost if branch probability info indicates that 634 // it is likely that both `cond1` and `cond2` will be computed. 635 // 636 // Set any field to -1 to make it ignored (setting BaseCost to -1 results in 637 // `shouldKeepJumpConditionsTogether` always returning false). 638 struct CondMergingParams { 639 int BaseCost; 640 int LikelyBias; 641 int UnlikelyBias; 642 }; 643 // Return params for deciding if we should keep two branch conditions merged 644 // or split them into two separate branches. 645 // Arg0: The binary op joining the two conditions (and/or). 646 // Arg1: The first condition (cond1) 647 // Arg2: The second condition (cond2) 648 virtual CondMergingParams 649 getJumpConditionMergingParams(Instruction::BinaryOps, const Value *, 650 const Value *) const { 651 // -1 will always result in splitting. 652 return {-1, -1, -1}; 653 } 654 655 /// Return true if selects are only cheaper than branches if the branch is 656 /// unlikely to be predicted right. 657 bool isPredictableSelectExpensive() const { 658 return PredictableSelectIsExpensive; 659 } 660 661 virtual bool fallBackToDAGISel(const Instruction &Inst) const { 662 return false; 663 } 664 665 /// Return true if the following transform is beneficial: 666 /// fold (conv (load x)) -> (load (conv*)x) 667 /// On architectures that don't natively support some vector loads 668 /// efficiently, casting the load to a smaller vector of larger types and 669 /// loading is more efficient, however, this can be undone by optimizations in 670 /// dag combiner. 671 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 672 const SelectionDAG &DAG, 673 const MachineMemOperand &MMO) const; 674 675 /// Return true if the following transform is beneficial: 676 /// (store (y (conv x)), y*)) -> (store x, (x*)) 677 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, 678 const SelectionDAG &DAG, 679 const MachineMemOperand &MMO) const { 680 // Default to the same logic as loads. 681 return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO); 682 } 683 684 /// Return true if it is expected to be cheaper to do a store of vector 685 /// constant with the given size and type for the address space than to 686 /// store the individual scalar element constants. 687 virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, 688 unsigned NumElem, 689 unsigned AddrSpace) const { 690 return IsZero; 691 } 692 693 /// Allow store merging for the specified type after legalization in addition 694 /// to before legalization. This may transform stores that do not exist 695 /// earlier (for example, stores created from intrinsics). 696 virtual bool mergeStoresAfterLegalization(EVT MemVT) const { 697 return true; 698 } 699 700 /// Returns if it's reasonable to merge stores to MemVT size. 701 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, 702 const MachineFunction &MF) const { 703 return true; 704 } 705 706 /// Return true if it is cheap to speculate a call to intrinsic cttz. 707 virtual bool isCheapToSpeculateCttz(Type *Ty) const { 708 return false; 709 } 710 711 /// Return true if it is cheap to speculate a call to intrinsic ctlz. 712 virtual bool isCheapToSpeculateCtlz(Type *Ty) const { 713 return false; 714 } 715 716 /// Return true if ctlz instruction is fast. 717 virtual bool isCtlzFast() const { 718 return false; 719 } 720 721 /// Return true if ctpop instruction is fast. 722 virtual bool isCtpopFast(EVT VT) const { 723 return isOperationLegal(ISD::CTPOP, VT); 724 } 725 726 /// Return the maximum number of "x & (x - 1)" operations that can be done 727 /// instead of deferring to a custom CTPOP. 728 virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const { 729 return 1; 730 } 731 732 /// Return true if instruction generated for equality comparison is folded 733 /// with instruction generated for signed comparison. 734 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } 735 736 /// Return true if the heuristic to prefer icmp eq zero should be used in code 737 /// gen prepare. 738 virtual bool preferZeroCompareBranch() const { return false; } 739 740 /// Return true if it is cheaper to split the store of a merged int val 741 /// from a pair of smaller values into multiple stores. 742 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { 743 return false; 744 } 745 746 /// Return if the target supports combining a 747 /// chain like: 748 /// \code 749 /// %andResult = and %val1, #mask 750 /// %icmpResult = icmp %andResult, 0 751 /// \endcode 752 /// into a single machine instruction of a form like: 753 /// \code 754 /// cc = test %register, #mask 755 /// \endcode 756 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { 757 return false; 758 } 759 760 /// Return true if it is valid to merge the TargetMMOFlags in two SDNodes. 761 virtual bool 762 areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, 763 const MemSDNode &NodeY) const { 764 return true; 765 } 766 767 /// Use bitwise logic to make pairs of compares more efficient. For example: 768 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 769 /// This should be true when it takes more than one instruction to lower 770 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on 771 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. 772 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { 773 return false; 774 } 775 776 /// Return the preferred operand type if the target has a quick way to compare 777 /// integer values of the given size. Assume that any legal integer type can 778 /// be compared efficiently. Targets may override this to allow illegal wide 779 /// types to return a vector type if there is support to compare that type. 780 virtual MVT hasFastEqualityCompare(unsigned NumBits) const { 781 MVT VT = MVT::getIntegerVT(NumBits); 782 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; 783 } 784 785 /// Return true if the target should transform: 786 /// (X & Y) == Y ---> (~X & Y) == 0 787 /// (X & Y) != Y ---> (~X & Y) != 0 788 /// 789 /// This may be profitable if the target has a bitwise and-not operation that 790 /// sets comparison flags. A target may want to limit the transformation based 791 /// on the type of Y or if Y is a constant. 792 /// 793 /// Note that the transform will not occur if Y is known to be a power-of-2 794 /// because a mask and compare of a single bit can be handled by inverting the 795 /// predicate, for example: 796 /// (X & 8) == 8 ---> (X & 8) != 0 797 virtual bool hasAndNotCompare(SDValue Y) const { 798 return false; 799 } 800 801 /// Return true if the target has a bitwise and-not operation: 802 /// X = ~A & B 803 /// This can be used to simplify select or other instructions. 804 virtual bool hasAndNot(SDValue X) const { 805 // If the target has the more complex version of this operation, assume that 806 // it has this operation too. 807 return hasAndNotCompare(X); 808 } 809 810 /// Return true if the target has a bit-test instruction: 811 /// (X & (1 << Y)) ==/!= 0 812 /// This knowledge can be used to prevent breaking the pattern, 813 /// or creating it if it could be recognized. 814 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } 815 816 /// There are two ways to clear extreme bits (either low or high): 817 /// Mask: x & (-1 << y) (the instcombine canonical form) 818 /// Shifts: x >> y << y 819 /// Return true if the variant with 2 variable shifts is preferred. 820 /// Return false if there is no preference. 821 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { 822 // By default, let's assume that no one prefers shifts. 823 return false; 824 } 825 826 /// Return true if it is profitable to fold a pair of shifts into a mask. 827 /// This is usually true on most targets. But some targets, like Thumb1, 828 /// have immediate shift instructions, but no immediate "and" instruction; 829 /// this makes the fold unprofitable. 830 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, 831 CombineLevel Level) const { 832 return true; 833 } 834 835 /// Should we tranform the IR-optimal check for whether given truncation 836 /// down into KeptBits would be truncating or not: 837 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) 838 /// Into it's more traditional form: 839 /// ((%x << C) a>> C) dstcond %x 840 /// Return true if we should transform. 841 /// Return false if there is no preference. 842 virtual bool shouldTransformSignedTruncationCheck(EVT XVT, 843 unsigned KeptBits) const { 844 // By default, let's assume that no one prefers shifts. 845 return false; 846 } 847 848 /// Given the pattern 849 /// (X & (C l>>/<< Y)) ==/!= 0 850 /// return true if it should be transformed into: 851 /// ((X <</l>> Y) & C) ==/!= 0 852 /// WARNING: if 'X' is a constant, the fold may deadlock! 853 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() 854 /// here because it can end up being not linked in. 855 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 856 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 857 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 858 SelectionDAG &DAG) const { 859 if (hasBitTest(X, Y)) { 860 // One interesting pattern that we'd want to form is 'bit test': 861 // ((1 << Y) & C) ==/!= 0 862 // But we also need to be careful not to try to reverse that fold. 863 864 // Is this '1 << Y' ? 865 if (OldShiftOpcode == ISD::SHL && CC->isOne()) 866 return false; // Keep the 'bit test' pattern. 867 868 // Will it be '1 << Y' after the transform ? 869 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) 870 return true; // Do form the 'bit test' pattern. 871 } 872 873 // If 'X' is a constant, and we transform, then we will immediately 874 // try to undo the fold, thus causing endless combine loop. 875 // So by default, let's assume everyone prefers the fold 876 // iff 'X' is not a constant. 877 return !XC; 878 } 879 880 // Return true if its desirable to perform the following transform: 881 // (fmul C, (uitofp Pow2)) 882 // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa)) 883 // (fdiv C, (uitofp Pow2)) 884 // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa)) 885 // 886 // This is only queried after we have verified the transform will be bitwise 887 // equals. 888 // 889 // SDNode *N : The FDiv/FMul node we want to transform. 890 // SDValue FPConst: The Float constant operand in `N`. 891 // SDValue IntPow2: The Integer power of 2 operand in `N`. 892 virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, 893 SDValue IntPow2) const { 894 // Default to avoiding fdiv which is often very expensive. 895 return N->getOpcode() == ISD::FDIV; 896 } 897 898 // Given: 899 // (icmp eq/ne (and X, C0), (shift X, C1)) 900 // or 901 // (icmp eq/ne X, (rotate X, CPow2)) 902 903 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the 904 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`) 905 // Do we prefer the shift to be shift-right, shift-left, or rotate. 906 // Note: Its only valid to convert the rotate version to the shift version iff 907 // the shift-amt (`C1`) is a power of 2 (including 0). 908 // If ShiftOpc (current Opcode) is returned, do nothing. 909 virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand( 910 EVT VT, unsigned ShiftOpc, bool MayTransformRotate, 911 const APInt &ShiftOrRotateAmt, 912 const std::optional<APInt> &AndMask) const { 913 return ShiftOpc; 914 } 915 916 /// These two forms are equivalent: 917 /// sub %y, (xor %x, -1) 918 /// add (add %x, 1), %y 919 /// The variant with two add's is IR-canonical. 920 /// Some targets may prefer one to the other. 921 virtual bool preferIncOfAddToSubOfNot(EVT VT) const { 922 // By default, let's assume that everyone prefers the form with two add's. 923 return true; 924 } 925 926 // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets 927 // may want to avoid this to prevent loss of sub_nsw pattern. 928 virtual bool preferABDSToABSWithNSW(EVT VT) const { 929 return true; 930 } 931 932 // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X)) 933 virtual bool preferScalarizeSplat(SDNode *N) const { return true; } 934 935 // Return true if the target wants to transform: 936 // (TruncVT truncate(sext_in_reg(VT X, ExtVT)) 937 // -> (TruncVT sext_in_reg(truncate(VT X), ExtVT)) 938 // Some targets might prefer pre-sextinreg to improve truncation/saturation. 939 virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const { 940 return true; 941 } 942 943 /// Return true if the target wants to use the optimization that 944 /// turns ext(promotableInst1(...(promotableInstN(load)))) into 945 /// promotedInst1(...(promotedInstN(ext(load)))). 946 bool enableExtLdPromotion() const { return EnableExtLdPromotion; } 947 948 /// Return true if the target can combine store(extractelement VectorTy, 949 /// Idx). 950 /// \p Cost[out] gives the cost of that transformation when this is true. 951 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, 952 unsigned &Cost) const { 953 return false; 954 } 955 956 /// Return true if the target shall perform extract vector element and store 957 /// given that the vector is known to be splat of constant. 958 /// \p Index[out] gives the index of the vector element to be extracted when 959 /// this is true. 960 virtual bool shallExtractConstSplatVectorElementToStore( 961 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const { 962 return false; 963 } 964 965 /// Return true if inserting a scalar into a variable element of an undef 966 /// vector is more efficiently handled by splatting the scalar instead. 967 virtual bool shouldSplatInsEltVarIndex(EVT) const { 968 return false; 969 } 970 971 /// Return true if target always benefits from combining into FMA for a 972 /// given value type. This must typically return false on targets where FMA 973 /// takes more cycles to execute than FADD. 974 virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; } 975 976 /// Return true if target always benefits from combining into FMA for a 977 /// given value type. This must typically return false on targets where FMA 978 /// takes more cycles to execute than FADD. 979 virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; } 980 981 /// Return the ValueType of the result of SETCC operations. 982 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 983 EVT VT) const; 984 985 /// Return the ValueType for comparison libcalls. Comparison libcalls include 986 /// floating point comparison calls, and Ordered/Unordered check calls on 987 /// floating point numbers. 988 virtual 989 MVT::SimpleValueType getCmpLibcallReturnType() const; 990 991 /// For targets without i1 registers, this gives the nature of the high-bits 992 /// of boolean values held in types wider than i1. 993 /// 994 /// "Boolean values" are special true/false values produced by nodes like 995 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. 996 /// Not to be confused with general values promoted from i1. Some cpus 997 /// distinguish between vectors of boolean and scalars; the isVec parameter 998 /// selects between the two kinds. For example on X86 a scalar boolean should 999 /// be zero extended from i1, while the elements of a vector of booleans 1000 /// should be sign extended from i1. 1001 /// 1002 /// Some cpus also treat floating point types the same way as they treat 1003 /// vectors instead of the way they treat scalars. 1004 BooleanContent getBooleanContents(bool isVec, bool isFloat) const { 1005 if (isVec) 1006 return BooleanVectorContents; 1007 return isFloat ? BooleanFloatContents : BooleanContents; 1008 } 1009 1010 BooleanContent getBooleanContents(EVT Type) const { 1011 return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); 1012 } 1013 1014 /// Promote the given target boolean to a target boolean of the given type. 1015 /// A target boolean is an integer value, not necessarily of type i1, the bits 1016 /// of which conform to getBooleanContents. 1017 /// 1018 /// ValVT is the type of values that produced the boolean. 1019 SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, 1020 EVT ValVT) const { 1021 SDLoc dl(Bool); 1022 EVT BoolVT = 1023 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ValVT); 1024 ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(ValVT)); 1025 return DAG.getNode(ExtendCode, dl, BoolVT, Bool); 1026 } 1027 1028 /// Return target scheduling preference. 1029 Sched::Preference getSchedulingPreference() const { 1030 return SchedPreferenceInfo; 1031 } 1032 1033 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics 1034 /// for different nodes. This function returns the preference (or none) for 1035 /// the given node. 1036 virtual Sched::Preference getSchedulingPreference(SDNode *) const { 1037 return Sched::None; 1038 } 1039 1040 /// Return the register class that should be used for the specified value 1041 /// type. 1042 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { 1043 (void)isDivergent; 1044 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; 1045 assert(RC && "This value type is not natively supported!"); 1046 return RC; 1047 } 1048 1049 /// Allows target to decide about the register class of the 1050 /// specific value that is live outside the defining block. 1051 /// Returns true if the value needs uniform register class. 1052 virtual bool requiresUniformRegister(MachineFunction &MF, 1053 const Value *) const { 1054 return false; 1055 } 1056 1057 /// Return the 'representative' register class for the specified value 1058 /// type. 1059 /// 1060 /// The 'representative' register class is the largest legal super-reg 1061 /// register class for the register class of the value type. For example, on 1062 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep 1063 /// register class is GR64 on x86_64. 1064 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { 1065 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; 1066 return RC; 1067 } 1068 1069 /// Return the cost of the 'representative' register class for the specified 1070 /// value type. 1071 virtual uint8_t getRepRegClassCostFor(MVT VT) const { 1072 return RepRegClassCostForVT[VT.SimpleTy]; 1073 } 1074 1075 /// Return the preferred strategy to legalize tihs SHIFT instruction, with 1076 /// \p ExpansionFactor being the recursion depth - how many expansion needed. 1077 enum class ShiftLegalizationStrategy { 1078 ExpandToParts, 1079 ExpandThroughStack, 1080 LowerToLibcall 1081 }; 1082 virtual ShiftLegalizationStrategy 1083 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 1084 unsigned ExpansionFactor) const { 1085 if (ExpansionFactor == 1) 1086 return ShiftLegalizationStrategy::ExpandToParts; 1087 return ShiftLegalizationStrategy::ExpandThroughStack; 1088 } 1089 1090 /// Return true if the target has native support for the specified value type. 1091 /// This means that it has a register that directly holds it without 1092 /// promotions or expansions. 1093 bool isTypeLegal(EVT VT) const { 1094 assert(!VT.isSimple() || 1095 (unsigned)VT.getSimpleVT().SimpleTy < std::size(RegClassForVT)); 1096 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; 1097 } 1098 1099 class ValueTypeActionImpl { 1100 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum 1101 /// that indicates how instruction selection should deal with the type. 1102 LegalizeTypeAction ValueTypeActions[MVT::VALUETYPE_SIZE]; 1103 1104 public: 1105 ValueTypeActionImpl() { 1106 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), 1107 TypeLegal); 1108 } 1109 1110 LegalizeTypeAction getTypeAction(MVT VT) const { 1111 return ValueTypeActions[VT.SimpleTy]; 1112 } 1113 1114 void setTypeAction(MVT VT, LegalizeTypeAction Action) { 1115 ValueTypeActions[VT.SimpleTy] = Action; 1116 } 1117 }; 1118 1119 const ValueTypeActionImpl &getValueTypeActions() const { 1120 return ValueTypeActions; 1121 } 1122 1123 /// Return pair that represents the legalization kind (first) that needs to 1124 /// happen to EVT (second) in order to type-legalize it. 1125 /// 1126 /// First: how we should legalize values of this type, either it is already 1127 /// legal (return 'Legal') or we need to promote it to a larger type (return 1128 /// 'Promote'), or we need to expand it into multiple registers of smaller 1129 /// integer type (return 'Expand'). 'Custom' is not an option. 1130 /// 1131 /// Second: for types supported by the target, this is an identity function. 1132 /// For types that must be promoted to larger types, this returns the larger 1133 /// type to promote to. For integer types that are larger than the largest 1134 /// integer register, this contains one step in the expansion to get to the 1135 /// smaller register. For illegal floating point types, this returns the 1136 /// integer type to transform to. 1137 LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; 1138 1139 /// Return how we should legalize values of this type, either it is already 1140 /// legal (return 'Legal') or we need to promote it to a larger type (return 1141 /// 'Promote'), or we need to expand it into multiple registers of smaller 1142 /// integer type (return 'Expand'). 'Custom' is not an option. 1143 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { 1144 return getTypeConversion(Context, VT).first; 1145 } 1146 LegalizeTypeAction getTypeAction(MVT VT) const { 1147 return ValueTypeActions.getTypeAction(VT); 1148 } 1149 1150 /// For types supported by the target, this is an identity function. For 1151 /// types that must be promoted to larger types, this returns the larger type 1152 /// to promote to. For integer types that are larger than the largest integer 1153 /// register, this contains one step in the expansion to get to the smaller 1154 /// register. For illegal floating point types, this returns the integer type 1155 /// to transform to. 1156 virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { 1157 return getTypeConversion(Context, VT).second; 1158 } 1159 1160 /// For types supported by the target, this is an identity function. For 1161 /// types that must be expanded (i.e. integer types that are larger than the 1162 /// largest integer register or illegal floating point types), this returns 1163 /// the largest legal type it will be expanded to. 1164 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { 1165 assert(!VT.isVector()); 1166 while (true) { 1167 switch (getTypeAction(Context, VT)) { 1168 case TypeLegal: 1169 return VT; 1170 case TypeExpandInteger: 1171 VT = getTypeToTransformTo(Context, VT); 1172 break; 1173 default: 1174 llvm_unreachable("Type is not legal nor is it to be expanded!"); 1175 } 1176 } 1177 } 1178 1179 /// Vector types are broken down into some number of legal first class types. 1180 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 1181 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 1182 /// turns into 4 EVT::i32 values with both PPC and X86. 1183 /// 1184 /// This method returns the number of registers needed, and the VT for each 1185 /// register. It also returns the VT and quantity of the intermediate values 1186 /// before they are promoted/expanded. 1187 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, 1188 EVT &IntermediateVT, 1189 unsigned &NumIntermediates, 1190 MVT &RegisterVT) const; 1191 1192 /// Certain targets such as MIPS require that some types such as vectors are 1193 /// always broken down into scalars in some contexts. This occurs even if the 1194 /// vector type is legal. 1195 virtual unsigned getVectorTypeBreakdownForCallingConv( 1196 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1197 unsigned &NumIntermediates, MVT &RegisterVT) const { 1198 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, 1199 RegisterVT); 1200 } 1201 1202 struct IntrinsicInfo { 1203 unsigned opc = 0; // target opcode 1204 EVT memVT; // memory VT 1205 1206 // value representing memory location 1207 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; 1208 1209 // Fallback address space for use if ptrVal is nullptr. std::nullopt means 1210 // unknown address space. 1211 std::optional<unsigned> fallbackAddressSpace; 1212 1213 int offset = 0; // offset off of ptrVal 1214 uint64_t size = 0; // the size of the memory location 1215 // (taken from memVT if zero) 1216 MaybeAlign align = Align(1); // alignment 1217 1218 MachineMemOperand::Flags flags = MachineMemOperand::MONone; 1219 IntrinsicInfo() = default; 1220 }; 1221 1222 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1223 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1224 /// true and store the intrinsic information into the IntrinsicInfo that was 1225 /// passed to the function. 1226 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, 1227 MachineFunction &, 1228 unsigned /*Intrinsic*/) const { 1229 return false; 1230 } 1231 1232 /// Returns true if the target can instruction select the specified FP 1233 /// immediate natively. If false, the legalizer will materialize the FP 1234 /// immediate as a load from a constant pool. 1235 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, 1236 bool ForCodeSize = false) const { 1237 return false; 1238 } 1239 1240 /// Targets can use this to indicate that they only support *some* 1241 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1242 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be 1243 /// legal. 1244 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { 1245 return true; 1246 } 1247 1248 /// Returns true if the operation can trap for the value type. 1249 /// 1250 /// VT must be a legal type. By default, we optimistically assume most 1251 /// operations don't trap except for integer divide and remainder. 1252 virtual bool canOpTrap(unsigned Op, EVT VT) const; 1253 1254 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1255 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1256 /// constant pool entry. 1257 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, 1258 EVT /*VT*/) const { 1259 return false; 1260 } 1261 1262 /// How to legalize this custom operation? 1263 virtual LegalizeAction getCustomOperationAction(SDNode &Op) const { 1264 return Legal; 1265 } 1266 1267 /// Return how this operation should be treated: either it is legal, needs to 1268 /// be promoted to a larger size, needs to be expanded to some other code 1269 /// sequence, or the target has a custom expander for it. 1270 LegalizeAction getOperationAction(unsigned Op, EVT VT) const { 1271 // If a target-specific SDNode requires legalization, require the target 1272 // to provide custom legalization for it. 1273 if (Op >= std::size(OpActions[0])) 1274 return Custom; 1275 if (VT.isExtended()) 1276 return Expand; 1277 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; 1278 } 1279 1280 /// Custom method defined by each target to indicate if an operation which 1281 /// may require a scale is supported natively by the target. 1282 /// If not, the operation is illegal. 1283 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, 1284 unsigned Scale) const { 1285 return false; 1286 } 1287 1288 /// Some fixed point operations may be natively supported by the target but 1289 /// only for specific scales. This method allows for checking 1290 /// if the width is supported by the target for a given operation that may 1291 /// depend on scale. 1292 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, 1293 unsigned Scale) const { 1294 auto Action = getOperationAction(Op, VT); 1295 if (Action != Legal) 1296 return Action; 1297 1298 // This operation is supported in this type but may only work on specific 1299 // scales. 1300 bool Supported; 1301 switch (Op) { 1302 default: 1303 llvm_unreachable("Unexpected fixed point operation."); 1304 case ISD::SMULFIX: 1305 case ISD::SMULFIXSAT: 1306 case ISD::UMULFIX: 1307 case ISD::UMULFIXSAT: 1308 case ISD::SDIVFIX: 1309 case ISD::SDIVFIXSAT: 1310 case ISD::UDIVFIX: 1311 case ISD::UDIVFIXSAT: 1312 Supported = isSupportedFixedPointOperation(Op, VT, Scale); 1313 break; 1314 } 1315 1316 return Supported ? Action : Expand; 1317 } 1318 1319 // If Op is a strict floating-point operation, return the result 1320 // of getOperationAction for the equivalent non-strict operation. 1321 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { 1322 unsigned EqOpc; 1323 switch (Op) { 1324 default: llvm_unreachable("Unexpected FP pseudo-opcode"); 1325 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1326 case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; 1327 #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1328 case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break; 1329 #include "llvm/IR/ConstrainedOps.def" 1330 } 1331 1332 return getOperationAction(EqOpc, VT); 1333 } 1334 1335 /// Return true if the specified operation is legal on this target or can be 1336 /// made legal with custom lowering. This is used to help guide high-level 1337 /// lowering decisions. LegalOnly is an optional convenience for code paths 1338 /// traversed pre and post legalisation. 1339 bool isOperationLegalOrCustom(unsigned Op, EVT VT, 1340 bool LegalOnly = false) const { 1341 if (LegalOnly) 1342 return isOperationLegal(Op, VT); 1343 1344 return (VT == MVT::Other || isTypeLegal(VT)) && 1345 (getOperationAction(Op, VT) == Legal || 1346 getOperationAction(Op, VT) == Custom); 1347 } 1348 1349 /// Return true if the specified operation is legal on this target or can be 1350 /// made legal using promotion. This is used to help guide high-level lowering 1351 /// decisions. LegalOnly is an optional convenience for code paths traversed 1352 /// pre and post legalisation. 1353 bool isOperationLegalOrPromote(unsigned Op, EVT VT, 1354 bool LegalOnly = false) const { 1355 if (LegalOnly) 1356 return isOperationLegal(Op, VT); 1357 1358 return (VT == MVT::Other || isTypeLegal(VT)) && 1359 (getOperationAction(Op, VT) == Legal || 1360 getOperationAction(Op, VT) == Promote); 1361 } 1362 1363 /// Return true if the specified operation is legal on this target or can be 1364 /// made legal with custom lowering or using promotion. This is used to help 1365 /// guide high-level lowering decisions. LegalOnly is an optional convenience 1366 /// for code paths traversed pre and post legalisation. 1367 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, 1368 bool LegalOnly = false) const { 1369 if (LegalOnly) 1370 return isOperationLegal(Op, VT); 1371 1372 return (VT == MVT::Other || isTypeLegal(VT)) && 1373 (getOperationAction(Op, VT) == Legal || 1374 getOperationAction(Op, VT) == Custom || 1375 getOperationAction(Op, VT) == Promote); 1376 } 1377 1378 /// Return true if the operation uses custom lowering, regardless of whether 1379 /// the type is legal or not. 1380 bool isOperationCustom(unsigned Op, EVT VT) const { 1381 return getOperationAction(Op, VT) == Custom; 1382 } 1383 1384 /// Return true if lowering to a jump table is allowed. 1385 virtual bool areJTsAllowed(const Function *Fn) const { 1386 if (Fn->getFnAttribute("no-jump-tables").getValueAsBool()) 1387 return false; 1388 1389 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 1390 isOperationLegalOrCustom(ISD::BRIND, MVT::Other); 1391 } 1392 1393 /// Check whether the range [Low,High] fits in a machine word. 1394 bool rangeFitsInWord(const APInt &Low, const APInt &High, 1395 const DataLayout &DL) const { 1396 // FIXME: Using the pointer type doesn't seem ideal. 1397 uint64_t BW = DL.getIndexSizeInBits(0u); 1398 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; 1399 return Range <= BW; 1400 } 1401 1402 /// Return true if lowering to a jump table is suitable for a set of case 1403 /// clusters which may contain \p NumCases cases, \p Range range of values. 1404 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, 1405 uint64_t Range, ProfileSummaryInfo *PSI, 1406 BlockFrequencyInfo *BFI) const; 1407 1408 /// Returns preferred type for switch condition. 1409 virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, 1410 EVT ConditionVT) const; 1411 1412 /// Return true if lowering to a bit test is suitable for a set of case 1413 /// clusters which contains \p NumDests unique destinations, \p Low and 1414 /// \p High as its lowest and highest case values, and expects \p NumCmps 1415 /// case value comparisons. Check if the number of destinations, comparison 1416 /// metric, and range are all suitable. 1417 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, 1418 const APInt &Low, const APInt &High, 1419 const DataLayout &DL) const { 1420 // FIXME: I don't think NumCmps is the correct metric: a single case and a 1421 // range of cases both require only one branch to lower. Just looking at the 1422 // number of clusters and destinations should be enough to decide whether to 1423 // build bit tests. 1424 1425 // To lower a range with bit tests, the range must fit the bitwidth of a 1426 // machine word. 1427 if (!rangeFitsInWord(Low, High, DL)) 1428 return false; 1429 1430 // Decide whether it's profitable to lower this range with bit tests. Each 1431 // destination requires a bit test and branch, and there is an overall range 1432 // check branch. For a small number of clusters, separate comparisons might 1433 // be cheaper, and for many destinations, splitting the range might be 1434 // better. 1435 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || 1436 (NumDests == 3 && NumCmps >= 6); 1437 } 1438 1439 /// Return true if the specified operation is illegal on this target or 1440 /// unlikely to be made legal with custom lowering. This is used to help guide 1441 /// high-level lowering decisions. 1442 bool isOperationExpand(unsigned Op, EVT VT) const { 1443 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); 1444 } 1445 1446 /// Return true if the specified operation is legal on this target. 1447 bool isOperationLegal(unsigned Op, EVT VT) const { 1448 return (VT == MVT::Other || isTypeLegal(VT)) && 1449 getOperationAction(Op, VT) == Legal; 1450 } 1451 1452 /// Return how this load with extension should be treated: either it is legal, 1453 /// needs to be promoted to a larger size, needs to be expanded to some other 1454 /// code sequence, or the target has a custom expander for it. 1455 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, 1456 EVT MemVT) const { 1457 if (ValVT.isExtended() || MemVT.isExtended()) return Expand; 1458 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; 1459 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; 1460 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && 1461 MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!"); 1462 unsigned Shift = 4 * ExtType; 1463 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); 1464 } 1465 1466 /// Return true if the specified load with extension is legal on this target. 1467 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { 1468 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; 1469 } 1470 1471 /// Return true if the specified load with extension is legal or custom 1472 /// on this target. 1473 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { 1474 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || 1475 getLoadExtAction(ExtType, ValVT, MemVT) == Custom; 1476 } 1477 1478 /// Same as getLoadExtAction, but for atomic loads. 1479 LegalizeAction getAtomicLoadExtAction(unsigned ExtType, EVT ValVT, 1480 EVT MemVT) const { 1481 if (ValVT.isExtended() || MemVT.isExtended()) return Expand; 1482 unsigned ValI = (unsigned)ValVT.getSimpleVT().SimpleTy; 1483 unsigned MemI = (unsigned)MemVT.getSimpleVT().SimpleTy; 1484 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && 1485 MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!"); 1486 unsigned Shift = 4 * ExtType; 1487 LegalizeAction Action = 1488 (LegalizeAction)((AtomicLoadExtActions[ValI][MemI] >> Shift) & 0xf); 1489 assert((Action == Legal || Action == Expand) && 1490 "Unsupported atomic load extension action."); 1491 return Action; 1492 } 1493 1494 /// Return true if the specified atomic load with extension is legal on 1495 /// this target. 1496 bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { 1497 return getAtomicLoadExtAction(ExtType, ValVT, MemVT) == Legal; 1498 } 1499 1500 /// Return how this store with truncation should be treated: either it is 1501 /// legal, needs to be promoted to a larger size, needs to be expanded to some 1502 /// other code sequence, or the target has a custom expander for it. 1503 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { 1504 if (ValVT.isExtended() || MemVT.isExtended()) return Expand; 1505 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; 1506 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; 1507 assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE && 1508 "Table isn't big enough!"); 1509 return TruncStoreActions[ValI][MemI]; 1510 } 1511 1512 /// Return true if the specified store with truncation is legal on this 1513 /// target. 1514 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { 1515 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; 1516 } 1517 1518 /// Return true if the specified store with truncation has solution on this 1519 /// target. 1520 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { 1521 return isTypeLegal(ValVT) && 1522 (getTruncStoreAction(ValVT, MemVT) == Legal || 1523 getTruncStoreAction(ValVT, MemVT) == Custom); 1524 } 1525 1526 virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, 1527 bool LegalOnly) const { 1528 if (LegalOnly) 1529 return isTruncStoreLegal(ValVT, MemVT); 1530 1531 return isTruncStoreLegalOrCustom(ValVT, MemVT); 1532 } 1533 1534 /// Return how the indexed load should be treated: either it is legal, needs 1535 /// to be promoted to a larger size, needs to be expanded to some other code 1536 /// sequence, or the target has a custom expander for it. 1537 LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { 1538 return getIndexedModeAction(IdxMode, VT, IMAB_Load); 1539 } 1540 1541 /// Return true if the specified indexed load is legal on this target. 1542 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { 1543 return VT.isSimple() && 1544 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || 1545 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); 1546 } 1547 1548 /// Return how the indexed store should be treated: either it is legal, needs 1549 /// to be promoted to a larger size, needs to be expanded to some other code 1550 /// sequence, or the target has a custom expander for it. 1551 LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { 1552 return getIndexedModeAction(IdxMode, VT, IMAB_Store); 1553 } 1554 1555 /// Return true if the specified indexed load is legal on this target. 1556 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { 1557 return VT.isSimple() && 1558 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || 1559 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); 1560 } 1561 1562 /// Return how the indexed load should be treated: either it is legal, needs 1563 /// to be promoted to a larger size, needs to be expanded to some other code 1564 /// sequence, or the target has a custom expander for it. 1565 LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { 1566 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad); 1567 } 1568 1569 /// Return true if the specified indexed load is legal on this target. 1570 bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { 1571 return VT.isSimple() && 1572 (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || 1573 getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); 1574 } 1575 1576 /// Return how the indexed store should be treated: either it is legal, needs 1577 /// to be promoted to a larger size, needs to be expanded to some other code 1578 /// sequence, or the target has a custom expander for it. 1579 LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { 1580 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore); 1581 } 1582 1583 /// Return true if the specified indexed load is legal on this target. 1584 bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { 1585 return VT.isSimple() && 1586 (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || 1587 getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); 1588 } 1589 1590 /// Returns true if the index type for a masked gather/scatter requires 1591 /// extending 1592 virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; } 1593 1594 // Returns true if Extend can be folded into the index of a masked gathers/scatters 1595 // on this target. 1596 virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const { 1597 return false; 1598 } 1599 1600 // Return true if the target supports a scatter/gather instruction with 1601 // indices which are scaled by the particular value. Note that all targets 1602 // must by definition support scale of 1. 1603 virtual bool isLegalScaleForGatherScatter(uint64_t Scale, 1604 uint64_t ElemSize) const { 1605 // MGATHER/MSCATTER are only required to support scaling by one or by the 1606 // element size. 1607 if (Scale != ElemSize && Scale != 1) 1608 return false; 1609 return true; 1610 } 1611 1612 /// Return how the condition code should be treated: either it is legal, needs 1613 /// to be expanded to some other code sequence, or the target has a custom 1614 /// expander for it. 1615 LegalizeAction 1616 getCondCodeAction(ISD::CondCode CC, MVT VT) const { 1617 assert((unsigned)CC < std::size(CondCodeActions) && 1618 ((unsigned)VT.SimpleTy >> 3) < std::size(CondCodeActions[0]) && 1619 "Table isn't big enough!"); 1620 // See setCondCodeAction for how this is encoded. 1621 uint32_t Shift = 4 * (VT.SimpleTy & 0x7); 1622 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; 1623 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); 1624 assert(Action != Promote && "Can't promote condition code!"); 1625 return Action; 1626 } 1627 1628 /// Return true if the specified condition code is legal for a comparison of 1629 /// the specified types on this target. 1630 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { 1631 return getCondCodeAction(CC, VT) == Legal; 1632 } 1633 1634 /// Return true if the specified condition code is legal or custom for a 1635 /// comparison of the specified types on this target. 1636 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { 1637 return getCondCodeAction(CC, VT) == Legal || 1638 getCondCodeAction(CC, VT) == Custom; 1639 } 1640 1641 /// If the action for this operation is to promote, this method returns the 1642 /// ValueType to promote to. 1643 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { 1644 assert(getOperationAction(Op, VT) == Promote && 1645 "This operation isn't promoted!"); 1646 1647 // See if this has an explicit type specified. 1648 std::map<std::pair<unsigned, MVT::SimpleValueType>, 1649 MVT::SimpleValueType>::const_iterator PTTI = 1650 PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); 1651 if (PTTI != PromoteToType.end()) return PTTI->second; 1652 1653 assert((VT.isInteger() || VT.isFloatingPoint()) && 1654 "Cannot autopromote this type, add it with AddPromotedToType."); 1655 1656 uint64_t VTBits = VT.getScalarSizeInBits(); 1657 MVT NVT = VT; 1658 do { 1659 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); 1660 assert(NVT.isInteger() == VT.isInteger() && 1661 NVT.isFloatingPoint() == VT.isFloatingPoint() && 1662 "Didn't find type to promote to!"); 1663 } while (VTBits >= NVT.getScalarSizeInBits() || !isTypeLegal(NVT) || 1664 getOperationAction(Op, NVT) == Promote); 1665 return NVT; 1666 } 1667 1668 virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 1669 bool AllowUnknown = false) const { 1670 return getValueType(DL, Ty, AllowUnknown); 1671 } 1672 1673 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM 1674 /// operations except for the pointer size. If AllowUnknown is true, this 1675 /// will return MVT::Other for types with no EVT counterpart (e.g. structs), 1676 /// otherwise it will assert. 1677 EVT getValueType(const DataLayout &DL, Type *Ty, 1678 bool AllowUnknown = false) const { 1679 // Lower scalar pointers to native pointer types. 1680 if (auto *PTy = dyn_cast<PointerType>(Ty)) 1681 return getPointerTy(DL, PTy->getAddressSpace()); 1682 1683 if (auto *VTy = dyn_cast<VectorType>(Ty)) { 1684 Type *EltTy = VTy->getElementType(); 1685 // Lower vectors of pointers to native pointer types. 1686 if (auto *PTy = dyn_cast<PointerType>(EltTy)) { 1687 EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace())); 1688 EltTy = PointerTy.getTypeForEVT(Ty->getContext()); 1689 } 1690 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), 1691 VTy->getElementCount()); 1692 } 1693 1694 return EVT::getEVT(Ty, AllowUnknown); 1695 } 1696 1697 EVT getMemValueType(const DataLayout &DL, Type *Ty, 1698 bool AllowUnknown = false) const { 1699 // Lower scalar pointers to native pointer types. 1700 if (auto *PTy = dyn_cast<PointerType>(Ty)) 1701 return getPointerMemTy(DL, PTy->getAddressSpace()); 1702 1703 if (auto *VTy = dyn_cast<VectorType>(Ty)) { 1704 Type *EltTy = VTy->getElementType(); 1705 if (auto *PTy = dyn_cast<PointerType>(EltTy)) { 1706 EVT PointerTy(getPointerMemTy(DL, PTy->getAddressSpace())); 1707 EltTy = PointerTy.getTypeForEVT(Ty->getContext()); 1708 } 1709 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), 1710 VTy->getElementCount()); 1711 } 1712 1713 return getValueType(DL, Ty, AllowUnknown); 1714 } 1715 1716 1717 /// Return the MVT corresponding to this LLVM type. See getValueType. 1718 MVT getSimpleValueType(const DataLayout &DL, Type *Ty, 1719 bool AllowUnknown = false) const { 1720 return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); 1721 } 1722 1723 /// Returns the desired alignment for ByVal or InAlloca aggregate function 1724 /// arguments in the caller parameter area. 1725 virtual Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; 1726 1727 /// Return the type of registers that this ValueType will eventually require. 1728 MVT getRegisterType(MVT VT) const { 1729 assert((unsigned)VT.SimpleTy < std::size(RegisterTypeForVT)); 1730 return RegisterTypeForVT[VT.SimpleTy]; 1731 } 1732 1733 /// Return the type of registers that this ValueType will eventually require. 1734 MVT getRegisterType(LLVMContext &Context, EVT VT) const { 1735 if (VT.isSimple()) 1736 return getRegisterType(VT.getSimpleVT()); 1737 if (VT.isVector()) { 1738 EVT VT1; 1739 MVT RegisterVT; 1740 unsigned NumIntermediates; 1741 (void)getVectorTypeBreakdown(Context, VT, VT1, 1742 NumIntermediates, RegisterVT); 1743 return RegisterVT; 1744 } 1745 if (VT.isInteger()) { 1746 return getRegisterType(Context, getTypeToTransformTo(Context, VT)); 1747 } 1748 llvm_unreachable("Unsupported extended type!"); 1749 } 1750 1751 /// Return the number of registers that this ValueType will eventually 1752 /// require. 1753 /// 1754 /// This is one for any types promoted to live in larger registers, but may be 1755 /// more than one for types (like i64) that are split into pieces. For types 1756 /// like i140, which are first promoted then expanded, it is the number of 1757 /// registers needed to hold all the bits of the original type. For an i140 1758 /// on a 32 bit machine this means 5 registers. 1759 /// 1760 /// RegisterVT may be passed as a way to override the default settings, for 1761 /// instance with i128 inline assembly operands on SystemZ. 1762 virtual unsigned 1763 getNumRegisters(LLVMContext &Context, EVT VT, 1764 std::optional<MVT> RegisterVT = std::nullopt) const { 1765 if (VT.isSimple()) { 1766 assert((unsigned)VT.getSimpleVT().SimpleTy < 1767 std::size(NumRegistersForVT)); 1768 return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; 1769 } 1770 if (VT.isVector()) { 1771 EVT VT1; 1772 MVT VT2; 1773 unsigned NumIntermediates; 1774 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); 1775 } 1776 if (VT.isInteger()) { 1777 unsigned BitWidth = VT.getSizeInBits(); 1778 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); 1779 return (BitWidth + RegWidth - 1) / RegWidth; 1780 } 1781 llvm_unreachable("Unsupported extended type!"); 1782 } 1783 1784 /// Certain combinations of ABIs, Targets and features require that types 1785 /// are legal for some operations and not for other operations. 1786 /// For MIPS all vector types must be passed through the integer register set. 1787 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, 1788 CallingConv::ID CC, EVT VT) const { 1789 return getRegisterType(Context, VT); 1790 } 1791 1792 /// Certain targets require unusual breakdowns of certain types. For MIPS, 1793 /// this occurs when a vector type is used, as vector are passed through the 1794 /// integer register set. 1795 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1796 CallingConv::ID CC, 1797 EVT VT) const { 1798 return getNumRegisters(Context, VT); 1799 } 1800 1801 /// Certain targets have context sensitive alignment requirements, where one 1802 /// type has the alignment requirement of another type. 1803 virtual Align getABIAlignmentForCallingConv(Type *ArgTy, 1804 const DataLayout &DL) const { 1805 return DL.getABITypeAlign(ArgTy); 1806 } 1807 1808 /// If true, then instruction selection should seek to shrink the FP constant 1809 /// of the specified type to a smaller type in order to save space and / or 1810 /// reduce runtime. 1811 virtual bool ShouldShrinkFPConstant(EVT) const { return true; } 1812 1813 /// Return true if it is profitable to reduce a load to a smaller type. 1814 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x 1815 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1816 EVT NewVT) const { 1817 // By default, assume that it is cheaper to extract a subvector from a wide 1818 // vector load rather than creating multiple narrow vector loads. 1819 if (NewVT.isVector() && !Load->hasOneUse()) 1820 return false; 1821 1822 return true; 1823 } 1824 1825 /// Return true (the default) if it is profitable to remove a sext_inreg(x) 1826 /// where the sext is redundant, and use x directly. 1827 virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; } 1828 1829 /// Indicates if any padding is guaranteed to go at the most significant bits 1830 /// when storing the type to memory and the type size isn't equal to the store 1831 /// size. 1832 bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const { 1833 return VT.isScalarInteger() && !VT.isByteSized(); 1834 } 1835 1836 /// When splitting a value of the specified type into parts, does the Lo 1837 /// or Hi part come first? This usually follows the endianness, except 1838 /// for ppcf128, where the Hi part always comes first. 1839 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { 1840 return DL.isBigEndian() || VT == MVT::ppcf128; 1841 } 1842 1843 /// If true, the target has custom DAG combine transformations that it can 1844 /// perform for the specified node. 1845 bool hasTargetDAGCombine(ISD::NodeType NT) const { 1846 assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); 1847 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); 1848 } 1849 1850 unsigned getGatherAllAliasesMaxDepth() const { 1851 return GatherAllAliasesMaxDepth; 1852 } 1853 1854 /// Returns the size of the platform's va_list object. 1855 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { 1856 return getPointerTy(DL).getSizeInBits(); 1857 } 1858 1859 /// Get maximum # of store operations permitted for llvm.memset 1860 /// 1861 /// This function returns the maximum number of store operations permitted 1862 /// to replace a call to llvm.memset. The value is set by the target at the 1863 /// performance threshold for such a replacement. If OptSize is true, 1864 /// return the limit for functions that have OptSize attribute. 1865 unsigned getMaxStoresPerMemset(bool OptSize) const { 1866 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; 1867 } 1868 1869 /// Get maximum # of store operations permitted for llvm.memcpy 1870 /// 1871 /// This function returns the maximum number of store operations permitted 1872 /// to replace a call to llvm.memcpy. The value is set by the target at the 1873 /// performance threshold for such a replacement. If OptSize is true, 1874 /// return the limit for functions that have OptSize attribute. 1875 unsigned getMaxStoresPerMemcpy(bool OptSize) const { 1876 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; 1877 } 1878 1879 /// \brief Get maximum # of store operations to be glued together 1880 /// 1881 /// This function returns the maximum number of store operations permitted 1882 /// to glue together during lowering of llvm.memcpy. The value is set by 1883 // the target at the performance threshold for such a replacement. 1884 virtual unsigned getMaxGluedStoresPerMemcpy() const { 1885 return MaxGluedStoresPerMemcpy; 1886 } 1887 1888 /// Get maximum # of load operations permitted for memcmp 1889 /// 1890 /// This function returns the maximum number of load operations permitted 1891 /// to replace a call to memcmp. The value is set by the target at the 1892 /// performance threshold for such a replacement. If OptSize is true, 1893 /// return the limit for functions that have OptSize attribute. 1894 unsigned getMaxExpandSizeMemcmp(bool OptSize) const { 1895 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; 1896 } 1897 1898 /// Get maximum # of store operations permitted for llvm.memmove 1899 /// 1900 /// This function returns the maximum number of store operations permitted 1901 /// to replace a call to llvm.memmove. The value is set by the target at the 1902 /// performance threshold for such a replacement. If OptSize is true, 1903 /// return the limit for functions that have OptSize attribute. 1904 unsigned getMaxStoresPerMemmove(bool OptSize) const { 1905 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; 1906 } 1907 1908 /// Determine if the target supports unaligned memory accesses. 1909 /// 1910 /// This function returns true if the target allows unaligned memory accesses 1911 /// of the specified type in the given address space. If true, it also returns 1912 /// a relative speed of the unaligned memory access in the last argument by 1913 /// reference. The higher the speed number the faster the operation comparing 1914 /// to a number returned by another such call. This is used, for example, in 1915 /// situations where an array copy/move/set is converted to a sequence of 1916 /// store operations. Its use helps to ensure that such replacements don't 1917 /// generate code that causes an alignment error (trap) on the target machine. 1918 virtual bool allowsMisalignedMemoryAccesses( 1919 EVT, unsigned AddrSpace = 0, Align Alignment = Align(1), 1920 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1921 unsigned * /*Fast*/ = nullptr) const { 1922 return false; 1923 } 1924 1925 /// LLT handling variant. 1926 virtual bool allowsMisalignedMemoryAccesses( 1927 LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), 1928 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1929 unsigned * /*Fast*/ = nullptr) const { 1930 return false; 1931 } 1932 1933 /// This function returns true if the memory access is aligned or if the 1934 /// target allows this specific unaligned memory access. If the access is 1935 /// allowed, the optional final parameter returns a relative speed of the 1936 /// access (as defined by the target). 1937 bool allowsMemoryAccessForAlignment( 1938 LLVMContext &Context, const DataLayout &DL, EVT VT, 1939 unsigned AddrSpace = 0, Align Alignment = Align(1), 1940 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1941 unsigned *Fast = nullptr) const; 1942 1943 /// Return true if the memory access of this type is aligned or if the target 1944 /// allows this specific unaligned access for the given MachineMemOperand. 1945 /// If the access is allowed, the optional final parameter returns a relative 1946 /// speed of the access (as defined by the target). 1947 bool allowsMemoryAccessForAlignment(LLVMContext &Context, 1948 const DataLayout &DL, EVT VT, 1949 const MachineMemOperand &MMO, 1950 unsigned *Fast = nullptr) const; 1951 1952 /// Return true if the target supports a memory access of this type for the 1953 /// given address space and alignment. If the access is allowed, the optional 1954 /// final parameter returns the relative speed of the access (as defined by 1955 /// the target). 1956 virtual bool 1957 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1958 unsigned AddrSpace = 0, Align Alignment = Align(1), 1959 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1960 unsigned *Fast = nullptr) const; 1961 1962 /// Return true if the target supports a memory access of this type for the 1963 /// given MachineMemOperand. If the access is allowed, the optional 1964 /// final parameter returns the relative access speed (as defined by the 1965 /// target). 1966 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1967 const MachineMemOperand &MMO, 1968 unsigned *Fast = nullptr) const; 1969 1970 /// LLT handling variant. 1971 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, 1972 const MachineMemOperand &MMO, 1973 unsigned *Fast = nullptr) const; 1974 1975 /// Returns the target specific optimal type for load and store operations as 1976 /// a result of memset, memcpy, and memmove lowering. 1977 /// It returns EVT::Other if the type should be determined using generic 1978 /// target-independent logic. 1979 virtual EVT 1980 getOptimalMemOpType(const MemOp &Op, 1981 const AttributeList & /*FuncAttributes*/) const { 1982 return MVT::Other; 1983 } 1984 1985 /// LLT returning variant. 1986 virtual LLT 1987 getOptimalMemOpLLT(const MemOp &Op, 1988 const AttributeList & /*FuncAttributes*/) const { 1989 return LLT(); 1990 } 1991 1992 /// Returns true if it's safe to use load / store of the specified type to 1993 /// expand memcpy / memset inline. 1994 /// 1995 /// This is mostly true for all types except for some special cases. For 1996 /// example, on X86 targets without SSE2 f64 load / store are done with fldl / 1997 /// fstpl which also does type conversion. Note the specified type doesn't 1998 /// have to be legal as the hook is used before type legalization. 1999 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } 2000 2001 /// Return lower limit for number of blocks in a jump table. 2002 virtual unsigned getMinimumJumpTableEntries() const; 2003 2004 /// Return lower limit of the density in a jump table. 2005 unsigned getMinimumJumpTableDensity(bool OptForSize) const; 2006 2007 /// Return upper limit for number of entries in a jump table. 2008 /// Zero if no limit. 2009 unsigned getMaximumJumpTableSize() const; 2010 2011 virtual bool isJumpTableRelative() const; 2012 2013 /// If a physical register, this specifies the register that 2014 /// llvm.savestack/llvm.restorestack should save and restore. 2015 Register getStackPointerRegisterToSaveRestore() const { 2016 return StackPointerRegisterToSaveRestore; 2017 } 2018 2019 /// If a physical register, this returns the register that receives the 2020 /// exception address on entry to an EH pad. 2021 virtual Register 2022 getExceptionPointerRegister(const Constant *PersonalityFn) const { 2023 return Register(); 2024 } 2025 2026 /// If a physical register, this returns the register that receives the 2027 /// exception typeid on entry to a landing pad. 2028 virtual Register 2029 getExceptionSelectorRegister(const Constant *PersonalityFn) const { 2030 return Register(); 2031 } 2032 2033 virtual bool needsFixedCatchObjects() const { 2034 report_fatal_error("Funclet EH is not implemented for this target"); 2035 } 2036 2037 /// Return the minimum stack alignment of an argument. 2038 Align getMinStackArgumentAlignment() const { 2039 return MinStackArgumentAlignment; 2040 } 2041 2042 /// Return the minimum function alignment. 2043 Align getMinFunctionAlignment() const { return MinFunctionAlignment; } 2044 2045 /// Return the preferred function alignment. 2046 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } 2047 2048 /// Return the preferred loop alignment. 2049 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const; 2050 2051 /// Return the maximum amount of bytes allowed to be emitted when padding for 2052 /// alignment 2053 virtual unsigned 2054 getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const; 2055 2056 /// Should loops be aligned even when the function is marked OptSize (but not 2057 /// MinSize). 2058 virtual bool alignLoopsWithOptSize() const { return false; } 2059 2060 /// If the target has a standard location for the stack protector guard, 2061 /// returns the address of that location. Otherwise, returns nullptr. 2062 /// DEPRECATED: please override useLoadStackGuardNode and customize 2063 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). 2064 virtual Value *getIRStackGuard(IRBuilderBase &IRB) const; 2065 2066 /// Inserts necessary declarations for SSP (stack protection) purpose. 2067 /// Should be used only when getIRStackGuard returns nullptr. 2068 virtual void insertSSPDeclarations(Module &M) const; 2069 2070 /// Return the variable that's previously inserted by insertSSPDeclarations, 2071 /// if any, otherwise return nullptr. Should be used only when 2072 /// getIRStackGuard returns nullptr. 2073 virtual Value *getSDagStackGuard(const Module &M) const; 2074 2075 /// If this function returns true, stack protection checks should XOR the 2076 /// frame pointer (or whichever pointer is used to address locals) into the 2077 /// stack guard value before checking it. getIRStackGuard must return nullptr 2078 /// if this returns true. 2079 virtual bool useStackGuardXorFP() const { return false; } 2080 2081 /// If the target has a standard stack protection check function that 2082 /// performs validation and error handling, returns the function. Otherwise, 2083 /// returns nullptr. Must be previously inserted by insertSSPDeclarations. 2084 /// Should be used only when getIRStackGuard returns nullptr. 2085 virtual Function *getSSPStackGuardCheck(const Module &M) const; 2086 2087 protected: 2088 Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, 2089 bool UseTLS) const; 2090 2091 public: 2092 /// Returns the target-specific address of the unsafe stack pointer. 2093 virtual Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const; 2094 2095 /// Returns the name of the symbol used to emit stack probes or the empty 2096 /// string if not applicable. 2097 virtual bool hasStackProbeSymbol(const MachineFunction &MF) const { return false; } 2098 2099 virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; } 2100 2101 virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const { 2102 return ""; 2103 } 2104 2105 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we 2106 /// are happy to sink it into basic blocks. A cast may be free, but not 2107 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. 2108 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const; 2109 2110 /// Return true if the pointer arguments to CI should be aligned by aligning 2111 /// the object whose address is being passed. If so then MinSize is set to the 2112 /// minimum size the object must be to be aligned and PrefAlign is set to the 2113 /// preferred alignment. 2114 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, 2115 Align & /*PrefAlign*/) const { 2116 return false; 2117 } 2118 2119 //===--------------------------------------------------------------------===// 2120 /// \name Helpers for TargetTransformInfo implementations 2121 /// @{ 2122 2123 /// Get the ISD node that corresponds to the Instruction class opcode. 2124 int InstructionOpcodeToISD(unsigned Opcode) const; 2125 2126 /// Get the ISD node that corresponds to the Intrinsic ID. Returns 2127 /// ISD::DELETED_NODE by default for an unsupported Intrinsic ID. 2128 int IntrinsicIDToISD(Intrinsic::ID ID) const; 2129 2130 /// @} 2131 2132 //===--------------------------------------------------------------------===// 2133 /// \name Helpers for atomic expansion. 2134 /// @{ 2135 2136 /// Returns the maximum atomic operation size (in bits) supported by 2137 /// the backend. Atomic operations greater than this size (as well 2138 /// as ones that are not naturally aligned), will be expanded by 2139 /// AtomicExpandPass into an __atomic_* library call. 2140 unsigned getMaxAtomicSizeInBitsSupported() const { 2141 return MaxAtomicSizeInBitsSupported; 2142 } 2143 2144 /// Returns the size in bits of the maximum div/rem the backend supports. 2145 /// Larger operations will be expanded by ExpandLargeDivRem. 2146 unsigned getMaxDivRemBitWidthSupported() const { 2147 return MaxDivRemBitWidthSupported; 2148 } 2149 2150 /// Returns the size in bits of the maximum larget fp convert the backend 2151 /// supports. Larger operations will be expanded by ExpandLargeFPConvert. 2152 unsigned getMaxLargeFPConvertBitWidthSupported() const { 2153 return MaxLargeFPConvertBitWidthSupported; 2154 } 2155 2156 /// Returns the size of the smallest cmpxchg or ll/sc instruction 2157 /// the backend supports. Any smaller operations are widened in 2158 /// AtomicExpandPass. 2159 /// 2160 /// Note that *unlike* operations above the maximum size, atomic ops 2161 /// are still natively supported below the minimum; they just 2162 /// require a more complex expansion. 2163 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } 2164 2165 /// Whether the target supports unaligned atomic operations. 2166 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } 2167 2168 /// Whether AtomicExpandPass should automatically insert fences and reduce 2169 /// ordering for this atomic. This should be true for most architectures with 2170 /// weak memory ordering. Defaults to false. 2171 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { 2172 return false; 2173 } 2174 2175 /// Whether AtomicExpandPass should automatically insert a trailing fence 2176 /// without reducing the ordering for this atomic. Defaults to false. 2177 virtual bool 2178 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const { 2179 return false; 2180 } 2181 2182 /// Perform a load-linked operation on Addr, returning a "Value *" with the 2183 /// corresponding pointee type. This may entail some non-trivial operations to 2184 /// truncate or reconstruct types that will be illegal in the backend. See 2185 /// ARMISelLowering for an example implementation. 2186 virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, 2187 Value *Addr, AtomicOrdering Ord) const { 2188 llvm_unreachable("Load linked unimplemented on this target"); 2189 } 2190 2191 /// Perform a store-conditional operation to Addr. Return the status of the 2192 /// store. This should be 0 if the store succeeded, non-zero otherwise. 2193 virtual Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, 2194 Value *Addr, AtomicOrdering Ord) const { 2195 llvm_unreachable("Store conditional unimplemented on this target"); 2196 } 2197 2198 /// Perform a masked atomicrmw using a target-specific intrinsic. This 2199 /// represents the core LL/SC loop which will be lowered at a late stage by 2200 /// the backend. The target-specific intrinsic returns the loaded value and 2201 /// is not responsible for masking and shifting the result. 2202 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, 2203 AtomicRMWInst *AI, 2204 Value *AlignedAddr, Value *Incr, 2205 Value *Mask, Value *ShiftAmt, 2206 AtomicOrdering Ord) const { 2207 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); 2208 } 2209 2210 /// Perform a atomicrmw expansion using a target-specific way. This is 2211 /// expected to be called when masked atomicrmw and bit test atomicrmw don't 2212 /// work, and the target supports another way to lower atomicrmw. 2213 virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const { 2214 llvm_unreachable( 2215 "Generic atomicrmw expansion unimplemented on this target"); 2216 } 2217 2218 /// Perform a cmpxchg expansion using a target-specific method. 2219 virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const { 2220 llvm_unreachable("Generic cmpxchg expansion unimplemented on this target"); 2221 } 2222 2223 /// Perform a bit test atomicrmw using a target-specific intrinsic. This 2224 /// represents the combined bit test intrinsic which will be lowered at a late 2225 /// stage by the backend. 2226 virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { 2227 llvm_unreachable( 2228 "Bit test atomicrmw expansion unimplemented on this target"); 2229 } 2230 2231 /// Perform a atomicrmw which the result is only used by comparison, using a 2232 /// target-specific intrinsic. This represents the combined atomic and compare 2233 /// intrinsic which will be lowered at a late stage by the backend. 2234 virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const { 2235 llvm_unreachable( 2236 "Compare arith atomicrmw expansion unimplemented on this target"); 2237 } 2238 2239 /// Perform a masked cmpxchg using a target-specific intrinsic. This 2240 /// represents the core LL/SC loop which will be lowered at a late stage by 2241 /// the backend. The target-specific intrinsic returns the loaded value and 2242 /// is not responsible for masking and shifting the result. 2243 virtual Value *emitMaskedAtomicCmpXchgIntrinsic( 2244 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 2245 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 2246 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target"); 2247 } 2248 2249 //===--------------------------------------------------------------------===// 2250 /// \name KCFI check lowering. 2251 /// @{ 2252 2253 virtual MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 2254 MachineBasicBlock::instr_iterator &MBBI, 2255 const TargetInstrInfo *TII) const { 2256 llvm_unreachable("KCFI is not supported on this target"); 2257 } 2258 2259 /// @} 2260 2261 /// Inserts in the IR a target-specific intrinsic specifying a fence. 2262 /// It is called by AtomicExpandPass before expanding an 2263 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad 2264 /// if shouldInsertFencesForAtomic returns true. 2265 /// 2266 /// Inst is the original atomic instruction, prior to other expansions that 2267 /// may be performed. 2268 /// 2269 /// This function should either return a nullptr, or a pointer to an IR-level 2270 /// Instruction*. Even complex fence sequences can be represented by a 2271 /// single Instruction* through an intrinsic to be lowered later. 2272 /// 2273 /// The default implementation emits an IR fence before any release (or 2274 /// stronger) operation that stores, and after any acquire (or stronger) 2275 /// operation. This is generally a correct implementation, but backends may 2276 /// override if they wish to use alternative schemes (e.g. the PowerPC 2277 /// standard ABI uses a fence before a seq_cst load instead of after a 2278 /// seq_cst store). 2279 /// @{ 2280 virtual Instruction *emitLeadingFence(IRBuilderBase &Builder, 2281 Instruction *Inst, 2282 AtomicOrdering Ord) const; 2283 2284 virtual Instruction *emitTrailingFence(IRBuilderBase &Builder, 2285 Instruction *Inst, 2286 AtomicOrdering Ord) const; 2287 /// @} 2288 2289 // Emits code that executes when the comparison result in the ll/sc 2290 // expansion of a cmpxchg instruction is such that the store-conditional will 2291 // not execute. This makes it possible to balance out the load-linked with 2292 // a dedicated instruction, if desired. 2293 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would 2294 // be unnecessarily held, except if clrex, inserted by this hook, is executed. 2295 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {} 2296 2297 /// Returns true if arguments should be sign-extended in lib calls. 2298 virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const { 2299 return IsSigned; 2300 } 2301 2302 /// Returns true if arguments should be extended in lib calls. 2303 virtual bool shouldExtendTypeInLibCall(EVT Type) const { 2304 return true; 2305 } 2306 2307 /// Returns how the given (atomic) load should be expanded by the 2308 /// IR-level AtomicExpand pass. 2309 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { 2310 return AtomicExpansionKind::None; 2311 } 2312 2313 /// Returns how the given (atomic) load should be cast by the IR-level 2314 /// AtomicExpand pass. 2315 virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const { 2316 if (LI->getType()->isFloatingPointTy()) 2317 return AtomicExpansionKind::CastToInteger; 2318 return AtomicExpansionKind::None; 2319 } 2320 2321 /// Returns how the given (atomic) store should be expanded by the IR-level 2322 /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try 2323 /// to use an atomicrmw xchg. 2324 virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const { 2325 return AtomicExpansionKind::None; 2326 } 2327 2328 /// Returns how the given (atomic) store should be cast by the IR-level 2329 /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger 2330 /// will try to cast the operands to integer values. 2331 virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const { 2332 if (SI->getValueOperand()->getType()->isFloatingPointTy()) 2333 return AtomicExpansionKind::CastToInteger; 2334 return AtomicExpansionKind::None; 2335 } 2336 2337 /// Returns how the given atomic cmpxchg should be expanded by the IR-level 2338 /// AtomicExpand pass. 2339 virtual AtomicExpansionKind 2340 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { 2341 return AtomicExpansionKind::None; 2342 } 2343 2344 /// Returns how the IR-level AtomicExpand pass should expand the given 2345 /// AtomicRMW, if at all. Default is to never expand. 2346 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 2347 return RMW->isFloatingPointOperation() ? 2348 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; 2349 } 2350 2351 /// Returns how the given atomic atomicrmw should be cast by the IR-level 2352 /// AtomicExpand pass. 2353 virtual AtomicExpansionKind 2354 shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const { 2355 if (RMWI->getOperation() == AtomicRMWInst::Xchg && 2356 (RMWI->getValOperand()->getType()->isFloatingPointTy() || 2357 RMWI->getValOperand()->getType()->isPointerTy())) 2358 return AtomicExpansionKind::CastToInteger; 2359 2360 return AtomicExpansionKind::None; 2361 } 2362 2363 /// On some platforms, an AtomicRMW that never actually modifies the value 2364 /// (such as fetch_add of 0) can be turned into a fence followed by an 2365 /// atomic load. This may sound useless, but it makes it possible for the 2366 /// processor to keep the cacheline shared, dramatically improving 2367 /// performance. And such idempotent RMWs are useful for implementing some 2368 /// kinds of locks, see for example (justification + benchmarks): 2369 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf 2370 /// This method tries doing that transformation, returning the atomic load if 2371 /// it succeeds, and nullptr otherwise. 2372 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo 2373 /// another round of expansion. 2374 virtual LoadInst * 2375 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { 2376 return nullptr; 2377 } 2378 2379 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, 2380 /// SIGN_EXTEND, or ANY_EXTEND). 2381 virtual ISD::NodeType getExtendForAtomicOps() const { 2382 return ISD::ZERO_EXTEND; 2383 } 2384 2385 /// Returns how the platform's atomic compare and swap expects its comparison 2386 /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is 2387 /// separate from getExtendForAtomicOps, which is concerned with the 2388 /// sign-extension of the instruction's output, whereas here we are concerned 2389 /// with the sign-extension of the input. For targets with compare-and-swap 2390 /// instructions (or sub-word comparisons in their LL/SC loop expansions), 2391 /// the input can be ANY_EXTEND, but the output will still have a specific 2392 /// extension. 2393 virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const { 2394 return ISD::ANY_EXTEND; 2395 } 2396 2397 /// @} 2398 2399 /// Returns true if we should normalize 2400 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and 2401 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely 2402 /// that it saves us from materializing N0 and N1 in an integer register. 2403 /// Targets that are able to perform and/or on flags should return false here. 2404 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, 2405 EVT VT) const { 2406 // If a target has multiple condition registers, then it likely has logical 2407 // operations on those registers. 2408 if (hasMultipleConditionRegisters()) 2409 return false; 2410 // Only do the transform if the value won't be split into multiple 2411 // registers. 2412 LegalizeTypeAction Action = getTypeAction(Context, VT); 2413 return Action != TypeExpandInteger && Action != TypeExpandFloat && 2414 Action != TypeSplitVector; 2415 } 2416 2417 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } 2418 2419 /// Return true if a select of constants (select Cond, C1, C2) should be 2420 /// transformed into simple math ops with the condition value. For example: 2421 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 2422 virtual bool convertSelectOfConstantsToMath(EVT VT) const { 2423 return false; 2424 } 2425 2426 /// Return true if it is profitable to transform an integer 2427 /// multiplication-by-constant into simpler operations like shifts and adds. 2428 /// This may be true if the target does not directly support the 2429 /// multiplication operation for the specified type or the sequence of simpler 2430 /// ops is faster than the multiply. 2431 virtual bool decomposeMulByConstant(LLVMContext &Context, 2432 EVT VT, SDValue C) const { 2433 return false; 2434 } 2435 2436 /// Return true if it may be profitable to transform 2437 /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 2438 /// This may not be true if c1 and c2 can be represented as immediates but 2439 /// c1*c2 cannot, for example. 2440 /// The target should check if c1, c2 and c1*c2 can be represented as 2441 /// immediates, or have to be materialized into registers. If it is not sure 2442 /// about some cases, a default true can be returned to let the DAGCombiner 2443 /// decide. 2444 /// AddNode is (add x, c1), and ConstNode is c2. 2445 virtual bool isMulAddWithConstProfitable(SDValue AddNode, 2446 SDValue ConstNode) const { 2447 return true; 2448 } 2449 2450 /// Return true if it is more correct/profitable to use strict FP_TO_INT 2451 /// conversion operations - canonicalizing the FP source value instead of 2452 /// converting all cases and then selecting based on value. 2453 /// This may be true if the target throws exceptions for out of bounds 2454 /// conversions or has fast FP CMOV. 2455 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, 2456 bool IsSigned) const { 2457 return false; 2458 } 2459 2460 /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic. 2461 /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always 2462 /// considered beneficial. 2463 /// If optimizing for size, expansion is only considered beneficial for upto 2464 /// 5 multiplies and a divide (if the exponent is negative). 2465 bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const { 2466 if (Exponent < 0) 2467 Exponent = -Exponent; 2468 uint64_t E = static_cast<uint64_t>(Exponent); 2469 return !OptForSize || (llvm::popcount(E) + Log2_64(E) < 7); 2470 } 2471 2472 //===--------------------------------------------------------------------===// 2473 // TargetLowering Configuration Methods - These methods should be invoked by 2474 // the derived class constructor to configure this object for the target. 2475 // 2476 protected: 2477 /// Specify how the target extends the result of integer and floating point 2478 /// boolean values from i1 to a wider type. See getBooleanContents. 2479 void setBooleanContents(BooleanContent Ty) { 2480 BooleanContents = Ty; 2481 BooleanFloatContents = Ty; 2482 } 2483 2484 /// Specify how the target extends the result of integer and floating point 2485 /// boolean values from i1 to a wider type. See getBooleanContents. 2486 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { 2487 BooleanContents = IntTy; 2488 BooleanFloatContents = FloatTy; 2489 } 2490 2491 /// Specify how the target extends the result of a vector boolean value from a 2492 /// vector of i1 to a wider type. See getBooleanContents. 2493 void setBooleanVectorContents(BooleanContent Ty) { 2494 BooleanVectorContents = Ty; 2495 } 2496 2497 /// Specify the target scheduling preference. 2498 void setSchedulingPreference(Sched::Preference Pref) { 2499 SchedPreferenceInfo = Pref; 2500 } 2501 2502 /// Indicate the minimum number of blocks to generate jump tables. 2503 void setMinimumJumpTableEntries(unsigned Val); 2504 2505 /// Indicate the maximum number of entries in jump tables. 2506 /// Set to zero to generate unlimited jump tables. 2507 void setMaximumJumpTableSize(unsigned); 2508 2509 /// If set to a physical register, this specifies the register that 2510 /// llvm.savestack/llvm.restorestack should save and restore. 2511 void setStackPointerRegisterToSaveRestore(Register R) { 2512 StackPointerRegisterToSaveRestore = R; 2513 } 2514 2515 /// Tells the code generator that the target has multiple (allocatable) 2516 /// condition registers that can be used to store the results of comparisons 2517 /// for use by selects and conditional branches. With multiple condition 2518 /// registers, the code generator will not aggressively sink comparisons into 2519 /// the blocks of their users. 2520 void setHasMultipleConditionRegisters(bool hasManyRegs = true) { 2521 HasMultipleConditionRegisters = hasManyRegs; 2522 } 2523 2524 /// Tells the code generator that the target has BitExtract instructions. 2525 /// The code generator will aggressively sink "shift"s into the blocks of 2526 /// their users if the users will generate "and" instructions which can be 2527 /// combined with "shift" to BitExtract instructions. 2528 void setHasExtractBitsInsn(bool hasExtractInsn = true) { 2529 HasExtractBitsInsn = hasExtractInsn; 2530 } 2531 2532 /// Tells the code generator not to expand logic operations on comparison 2533 /// predicates into separate sequences that increase the amount of flow 2534 /// control. 2535 void setJumpIsExpensive(bool isExpensive = true); 2536 2537 /// Tells the code generator which bitwidths to bypass. 2538 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { 2539 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; 2540 } 2541 2542 /// Add the specified register class as an available regclass for the 2543 /// specified value type. This indicates the selector can handle values of 2544 /// that class natively. 2545 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { 2546 assert((unsigned)VT.SimpleTy < std::size(RegClassForVT)); 2547 RegClassForVT[VT.SimpleTy] = RC; 2548 } 2549 2550 /// Return the largest legal super-reg register class of the register class 2551 /// for the specified type and its associated "cost". 2552 virtual std::pair<const TargetRegisterClass *, uint8_t> 2553 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; 2554 2555 /// Once all of the register classes are added, this allows us to compute 2556 /// derived properties we expose. 2557 void computeRegisterProperties(const TargetRegisterInfo *TRI); 2558 2559 /// Indicate that the specified operation does not work with the specified 2560 /// type and indicate what to do about it. Note that VT may refer to either 2561 /// the type of a result or that of an operand of Op. 2562 void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { 2563 assert(Op < std::size(OpActions[0]) && "Table isn't big enough!"); 2564 OpActions[(unsigned)VT.SimpleTy][Op] = Action; 2565 } 2566 void setOperationAction(ArrayRef<unsigned> Ops, MVT VT, 2567 LegalizeAction Action) { 2568 for (auto Op : Ops) 2569 setOperationAction(Op, VT, Action); 2570 } 2571 void setOperationAction(ArrayRef<unsigned> Ops, ArrayRef<MVT> VTs, 2572 LegalizeAction Action) { 2573 for (auto VT : VTs) 2574 setOperationAction(Ops, VT, Action); 2575 } 2576 2577 /// Indicate that the specified load with extension does not work with the 2578 /// specified type and indicate what to do about it. 2579 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, 2580 LegalizeAction Action) { 2581 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && 2582 MemVT.isValid() && "Table isn't big enough!"); 2583 assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); 2584 unsigned Shift = 4 * ExtType; 2585 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); 2586 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; 2587 } 2588 void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT, 2589 LegalizeAction Action) { 2590 for (auto ExtType : ExtTypes) 2591 setLoadExtAction(ExtType, ValVT, MemVT, Action); 2592 } 2593 void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, 2594 ArrayRef<MVT> MemVTs, LegalizeAction Action) { 2595 for (auto MemVT : MemVTs) 2596 setLoadExtAction(ExtTypes, ValVT, MemVT, Action); 2597 } 2598 2599 /// Let target indicate that an extending atomic load of the specified type 2600 /// is legal. 2601 void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, 2602 LegalizeAction Action) { 2603 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && 2604 MemVT.isValid() && "Table isn't big enough!"); 2605 assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); 2606 unsigned Shift = 4 * ExtType; 2607 AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= 2608 ~((uint16_t)0xF << Shift); 2609 AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= 2610 ((uint16_t)Action << Shift); 2611 } 2612 void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT, 2613 LegalizeAction Action) { 2614 for (auto ExtType : ExtTypes) 2615 setAtomicLoadExtAction(ExtType, ValVT, MemVT, Action); 2616 } 2617 void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, 2618 ArrayRef<MVT> MemVTs, LegalizeAction Action) { 2619 for (auto MemVT : MemVTs) 2620 setAtomicLoadExtAction(ExtTypes, ValVT, MemVT, Action); 2621 } 2622 2623 /// Indicate that the specified truncating store does not work with the 2624 /// specified type and indicate what to do about it. 2625 void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { 2626 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); 2627 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; 2628 } 2629 2630 /// Indicate that the specified indexed load does or does not work with the 2631 /// specified type and indicate what to do abort it. 2632 /// 2633 /// NOTE: All indexed mode loads are initialized to Expand in 2634 /// TargetLowering.cpp 2635 void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, MVT VT, 2636 LegalizeAction Action) { 2637 for (auto IdxMode : IdxModes) 2638 setIndexedModeAction(IdxMode, VT, IMAB_Load, Action); 2639 } 2640 2641 void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, 2642 LegalizeAction Action) { 2643 for (auto VT : VTs) 2644 setIndexedLoadAction(IdxModes, VT, Action); 2645 } 2646 2647 /// Indicate that the specified indexed store does or does not work with the 2648 /// specified type and indicate what to do about it. 2649 /// 2650 /// NOTE: All indexed mode stores are initialized to Expand in 2651 /// TargetLowering.cpp 2652 void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, MVT VT, 2653 LegalizeAction Action) { 2654 for (auto IdxMode : IdxModes) 2655 setIndexedModeAction(IdxMode, VT, IMAB_Store, Action); 2656 } 2657 2658 void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, 2659 LegalizeAction Action) { 2660 for (auto VT : VTs) 2661 setIndexedStoreAction(IdxModes, VT, Action); 2662 } 2663 2664 /// Indicate that the specified indexed masked load does or does not work with 2665 /// the specified type and indicate what to do about it. 2666 /// 2667 /// NOTE: All indexed mode masked loads are initialized to Expand in 2668 /// TargetLowering.cpp 2669 void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, 2670 LegalizeAction Action) { 2671 setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action); 2672 } 2673 2674 /// Indicate that the specified indexed masked store does or does not work 2675 /// with the specified type and indicate what to do about it. 2676 /// 2677 /// NOTE: All indexed mode masked stores are initialized to Expand in 2678 /// TargetLowering.cpp 2679 void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, 2680 LegalizeAction Action) { 2681 setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action); 2682 } 2683 2684 /// Indicate that the specified condition code is or isn't supported on the 2685 /// target and indicate what to do about it. 2686 void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT, 2687 LegalizeAction Action) { 2688 for (auto CC : CCs) { 2689 assert(VT.isValid() && (unsigned)CC < std::size(CondCodeActions) && 2690 "Table isn't big enough!"); 2691 assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); 2692 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 2693 /// 32-bit value and the upper 29 bits index into the second dimension of 2694 /// the array to select what 32-bit value to use. 2695 uint32_t Shift = 4 * (VT.SimpleTy & 0x7); 2696 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); 2697 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; 2698 } 2699 } 2700 void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, ArrayRef<MVT> VTs, 2701 LegalizeAction Action) { 2702 for (auto VT : VTs) 2703 setCondCodeAction(CCs, VT, Action); 2704 } 2705 2706 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults 2707 /// to trying a larger integer/fp until it can find one that works. If that 2708 /// default is insufficient, this method can be used by the target to override 2709 /// the default. 2710 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { 2711 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; 2712 } 2713 2714 /// Convenience method to set an operation to Promote and specify the type 2715 /// in a single call. 2716 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { 2717 setOperationAction(Opc, OrigVT, Promote); 2718 AddPromotedToType(Opc, OrigVT, DestVT); 2719 } 2720 void setOperationPromotedToType(ArrayRef<unsigned> Ops, MVT OrigVT, 2721 MVT DestVT) { 2722 for (auto Op : Ops) { 2723 setOperationAction(Op, OrigVT, Promote); 2724 AddPromotedToType(Op, OrigVT, DestVT); 2725 } 2726 } 2727 2728 /// Targets should invoke this method for each target independent node that 2729 /// they want to provide a custom DAG combiner for by implementing the 2730 /// PerformDAGCombine virtual method. 2731 void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) { 2732 for (auto NT : NTs) { 2733 assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); 2734 TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7); 2735 } 2736 } 2737 2738 /// Set the target's minimum function alignment. 2739 void setMinFunctionAlignment(Align Alignment) { 2740 MinFunctionAlignment = Alignment; 2741 } 2742 2743 /// Set the target's preferred function alignment. This should be set if 2744 /// there is a performance benefit to higher-than-minimum alignment 2745 void setPrefFunctionAlignment(Align Alignment) { 2746 PrefFunctionAlignment = Alignment; 2747 } 2748 2749 /// Set the target's preferred loop alignment. Default alignment is one, it 2750 /// means the target does not care about loop alignment. The target may also 2751 /// override getPrefLoopAlignment to provide per-loop values. 2752 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } 2753 void setMaxBytesForAlignment(unsigned MaxBytes) { 2754 MaxBytesForAlignment = MaxBytes; 2755 } 2756 2757 /// Set the minimum stack alignment of an argument. 2758 void setMinStackArgumentAlignment(Align Alignment) { 2759 MinStackArgumentAlignment = Alignment; 2760 } 2761 2762 /// Set the maximum atomic operation size supported by the 2763 /// backend. Atomic operations greater than this size (as well as 2764 /// ones that are not naturally aligned), will be expanded by 2765 /// AtomicExpandPass into an __atomic_* library call. 2766 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { 2767 MaxAtomicSizeInBitsSupported = SizeInBits; 2768 } 2769 2770 /// Set the size in bits of the maximum div/rem the backend supports. 2771 /// Larger operations will be expanded by ExpandLargeDivRem. 2772 void setMaxDivRemBitWidthSupported(unsigned SizeInBits) { 2773 MaxDivRemBitWidthSupported = SizeInBits; 2774 } 2775 2776 /// Set the size in bits of the maximum fp convert the backend supports. 2777 /// Larger operations will be expanded by ExpandLargeFPConvert. 2778 void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) { 2779 MaxLargeFPConvertBitWidthSupported = SizeInBits; 2780 } 2781 2782 /// Sets the minimum cmpxchg or ll/sc size supported by the backend. 2783 void setMinCmpXchgSizeInBits(unsigned SizeInBits) { 2784 MinCmpXchgSizeInBits = SizeInBits; 2785 } 2786 2787 /// Sets whether unaligned atomic operations are supported. 2788 void setSupportsUnalignedAtomics(bool UnalignedSupported) { 2789 SupportsUnalignedAtomics = UnalignedSupported; 2790 } 2791 2792 public: 2793 //===--------------------------------------------------------------------===// 2794 // Addressing mode description hooks (used by LSR etc). 2795 // 2796 2797 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store 2798 /// instructions reading the address. This allows as much computation as 2799 /// possible to be done in the address mode for that operand. This hook lets 2800 /// targets also pass back when this should be done on intrinsics which 2801 /// load/store. 2802 virtual bool getAddrModeArguments(const IntrinsicInst * /*I*/, 2803 SmallVectorImpl<Value *> & /*Ops*/, 2804 Type *& /*AccessTy*/) const { 2805 return false; 2806 } 2807 2808 /// This represents an addressing mode of: 2809 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale 2810 /// If BaseGV is null, there is no BaseGV. 2811 /// If BaseOffs is zero, there is no base offset. 2812 /// If HasBaseReg is false, there is no base register. 2813 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with 2814 /// no scale. 2815 /// If ScalableOffset is zero, there is no scalable offset. 2816 struct AddrMode { 2817 GlobalValue *BaseGV = nullptr; 2818 int64_t BaseOffs = 0; 2819 bool HasBaseReg = false; 2820 int64_t Scale = 0; 2821 int64_t ScalableOffset = 0; 2822 AddrMode() = default; 2823 }; 2824 2825 /// Return true if the addressing mode represented by AM is legal for this 2826 /// target, for a load/store of the specified type. 2827 /// 2828 /// The type may be VoidTy, in which case only return true if the addressing 2829 /// mode is legal for a load/store of any legal type. TODO: Handle 2830 /// pre/postinc as well. 2831 /// 2832 /// If the address space cannot be determined, it will be -1. 2833 /// 2834 /// TODO: Remove default argument 2835 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 2836 Type *Ty, unsigned AddrSpace, 2837 Instruction *I = nullptr) const; 2838 2839 /// Returns true if the targets addressing mode can target thread local 2840 /// storage (TLS). 2841 virtual bool addressingModeSupportsTLS(const GlobalValue &) const { 2842 return false; 2843 } 2844 2845 /// Return the prefered common base offset. 2846 virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, 2847 int64_t MaxOffset) const { 2848 return 0; 2849 } 2850 2851 /// Return true if the specified immediate is legal icmp immediate, that is 2852 /// the target has icmp instructions which can compare a register against the 2853 /// immediate without having to materialize the immediate into a register. 2854 virtual bool isLegalICmpImmediate(int64_t) const { 2855 return true; 2856 } 2857 2858 /// Return true if the specified immediate is legal add immediate, that is the 2859 /// target has add instructions which can add a register with the immediate 2860 /// without having to materialize the immediate into a register. 2861 virtual bool isLegalAddImmediate(int64_t) const { 2862 return true; 2863 } 2864 2865 /// Return true if adding the specified scalable immediate is legal, that is 2866 /// the target has add instructions which can add a register with the 2867 /// immediate (multiplied by vscale) without having to materialize the 2868 /// immediate into a register. 2869 virtual bool isLegalAddScalableImmediate(int64_t) const { return false; } 2870 2871 /// Return true if the specified immediate is legal for the value input of a 2872 /// store instruction. 2873 virtual bool isLegalStoreImmediate(int64_t Value) const { 2874 // Default implementation assumes that at least 0 works since it is likely 2875 // that a zero register exists or a zero immediate is allowed. 2876 return Value == 0; 2877 } 2878 2879 /// Given a shuffle vector SVI representing a vector splat, return a new 2880 /// scalar type of size equal to SVI's scalar type if the new type is more 2881 /// profitable. Returns nullptr otherwise. For example under MVE float splats 2882 /// are converted to integer to prevent the need to move from SPR to GPR 2883 /// registers. 2884 virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const { 2885 return nullptr; 2886 } 2887 2888 /// Given a set in interconnected phis of type 'From' that are loaded/stored 2889 /// or bitcast to type 'To', return true if the set should be converted to 2890 /// 'To'. 2891 virtual bool shouldConvertPhiType(Type *From, Type *To) const { 2892 return (From->isIntegerTy() || From->isFloatingPointTy()) && 2893 (To->isIntegerTy() || To->isFloatingPointTy()); 2894 } 2895 2896 /// Returns true if the opcode is a commutative binary operation. 2897 virtual bool isCommutativeBinOp(unsigned Opcode) const { 2898 // FIXME: This should get its info from the td file. 2899 switch (Opcode) { 2900 case ISD::ADD: 2901 case ISD::SMIN: 2902 case ISD::SMAX: 2903 case ISD::UMIN: 2904 case ISD::UMAX: 2905 case ISD::MUL: 2906 case ISD::MULHU: 2907 case ISD::MULHS: 2908 case ISD::SMUL_LOHI: 2909 case ISD::UMUL_LOHI: 2910 case ISD::FADD: 2911 case ISD::FMUL: 2912 case ISD::AND: 2913 case ISD::OR: 2914 case ISD::XOR: 2915 case ISD::SADDO: 2916 case ISD::UADDO: 2917 case ISD::ADDC: 2918 case ISD::ADDE: 2919 case ISD::SADDSAT: 2920 case ISD::UADDSAT: 2921 case ISD::FMINNUM: 2922 case ISD::FMAXNUM: 2923 case ISD::FMINNUM_IEEE: 2924 case ISD::FMAXNUM_IEEE: 2925 case ISD::FMINIMUM: 2926 case ISD::FMAXIMUM: 2927 case ISD::FMINIMUMNUM: 2928 case ISD::FMAXIMUMNUM: 2929 case ISD::AVGFLOORS: 2930 case ISD::AVGFLOORU: 2931 case ISD::AVGCEILS: 2932 case ISD::AVGCEILU: 2933 case ISD::ABDS: 2934 case ISD::ABDU: 2935 return true; 2936 default: return false; 2937 } 2938 } 2939 2940 /// Return true if the node is a math/logic binary operator. 2941 virtual bool isBinOp(unsigned Opcode) const { 2942 // A commutative binop must be a binop. 2943 if (isCommutativeBinOp(Opcode)) 2944 return true; 2945 // These are non-commutative binops. 2946 switch (Opcode) { 2947 case ISD::SUB: 2948 case ISD::SHL: 2949 case ISD::SRL: 2950 case ISD::SRA: 2951 case ISD::ROTL: 2952 case ISD::ROTR: 2953 case ISD::SDIV: 2954 case ISD::UDIV: 2955 case ISD::SREM: 2956 case ISD::UREM: 2957 case ISD::SSUBSAT: 2958 case ISD::USUBSAT: 2959 case ISD::FSUB: 2960 case ISD::FDIV: 2961 case ISD::FREM: 2962 return true; 2963 default: 2964 return false; 2965 } 2966 } 2967 2968 /// Return true if it's free to truncate a value of type FromTy to type 2969 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 2970 /// by referencing its sub-register AX. 2971 /// Targets must return false when FromTy <= ToTy. 2972 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { 2973 return false; 2974 } 2975 2976 /// Return true if a truncation from FromTy to ToTy is permitted when deciding 2977 /// whether a call is in tail position. Typically this means that both results 2978 /// would be assigned to the same register or stack slot, but it could mean 2979 /// the target performs adequate checks of its own before proceeding with the 2980 /// tail call. Targets must return false when FromTy <= ToTy. 2981 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { 2982 return false; 2983 } 2984 2985 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; } 2986 virtual bool isTruncateFree(LLT FromTy, LLT ToTy, LLVMContext &Ctx) const { 2987 return isTruncateFree(getApproximateEVTForLLT(FromTy, Ctx), 2988 getApproximateEVTForLLT(ToTy, Ctx)); 2989 } 2990 2991 /// Return true if truncating the specific node Val to type VT2 is free. 2992 virtual bool isTruncateFree(SDValue Val, EVT VT2) const { 2993 // Fallback to type matching. 2994 return isTruncateFree(Val.getValueType(), VT2); 2995 } 2996 2997 virtual bool isProfitableToHoist(Instruction *I) const { return true; } 2998 2999 /// Return true if the extension represented by \p I is free. 3000 /// Unlikely the is[Z|FP]ExtFree family which is based on types, 3001 /// this method can use the context provided by \p I to decide 3002 /// whether or not \p I is free. 3003 /// This method extends the behavior of the is[Z|FP]ExtFree family. 3004 /// In other words, if is[Z|FP]Free returns true, then this method 3005 /// returns true as well. The converse is not true. 3006 /// The target can perform the adequate checks by overriding isExtFreeImpl. 3007 /// \pre \p I must be a sign, zero, or fp extension. 3008 bool isExtFree(const Instruction *I) const { 3009 switch (I->getOpcode()) { 3010 case Instruction::FPExt: 3011 if (isFPExtFree(EVT::getEVT(I->getType()), 3012 EVT::getEVT(I->getOperand(0)->getType()))) 3013 return true; 3014 break; 3015 case Instruction::ZExt: 3016 if (isZExtFree(I->getOperand(0)->getType(), I->getType())) 3017 return true; 3018 break; 3019 case Instruction::SExt: 3020 break; 3021 default: 3022 llvm_unreachable("Instruction is not an extension"); 3023 } 3024 return isExtFreeImpl(I); 3025 } 3026 3027 /// Return true if \p Load and \p Ext can form an ExtLoad. 3028 /// For example, in AArch64 3029 /// %L = load i8, i8* %ptr 3030 /// %E = zext i8 %L to i32 3031 /// can be lowered into one load instruction 3032 /// ldrb w0, [x0] 3033 bool isExtLoad(const LoadInst *Load, const Instruction *Ext, 3034 const DataLayout &DL) const { 3035 EVT VT = getValueType(DL, Ext->getType()); 3036 EVT LoadVT = getValueType(DL, Load->getType()); 3037 3038 // If the load has other users and the truncate is not free, the ext 3039 // probably isn't free. 3040 if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && 3041 !isTruncateFree(Ext->getType(), Load->getType())) 3042 return false; 3043 3044 // Check whether the target supports casts folded into loads. 3045 unsigned LType; 3046 if (isa<ZExtInst>(Ext)) 3047 LType = ISD::ZEXTLOAD; 3048 else { 3049 assert(isa<SExtInst>(Ext) && "Unexpected ext type!"); 3050 LType = ISD::SEXTLOAD; 3051 } 3052 3053 return isLoadExtLegal(LType, VT, LoadVT); 3054 } 3055 3056 /// Return true if any actual instruction that defines a value of type FromTy 3057 /// implicitly zero-extends the value to ToTy in the result register. 3058 /// 3059 /// The function should return true when it is likely that the truncate can 3060 /// be freely folded with an instruction defining a value of FromTy. If 3061 /// the defining instruction is unknown (because you're looking at a 3062 /// function argument, PHI, etc.) then the target may require an 3063 /// explicit truncate, which is not necessarily free, but this function 3064 /// does not deal with those cases. 3065 /// Targets must return false when FromTy >= ToTy. 3066 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { 3067 return false; 3068 } 3069 3070 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; } 3071 virtual bool isZExtFree(LLT FromTy, LLT ToTy, LLVMContext &Ctx) const { 3072 return isZExtFree(getApproximateEVTForLLT(FromTy, Ctx), 3073 getApproximateEVTForLLT(ToTy, Ctx)); 3074 } 3075 3076 /// Return true if zero-extending the specific node Val to type VT2 is free 3077 /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or 3078 /// because it's folded such as X86 zero-extending loads). 3079 virtual bool isZExtFree(SDValue Val, EVT VT2) const { 3080 return isZExtFree(Val.getValueType(), VT2); 3081 } 3082 3083 /// Return true if sign-extension from FromTy to ToTy is cheaper than 3084 /// zero-extension. 3085 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { 3086 return false; 3087 } 3088 3089 /// Return true if this constant should be sign extended when promoting to 3090 /// a larger type. 3091 virtual bool signExtendConstant(const ConstantInt *C) const { return false; } 3092 3093 /// Try to optimize extending or truncating conversion instructions (like 3094 /// zext, trunc, fptoui, uitofp) for the target. 3095 virtual bool 3096 optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, 3097 const TargetTransformInfo &TTI) const { 3098 return false; 3099 } 3100 3101 /// Return true if the target supplies and combines to a paired load 3102 /// two loaded values of type LoadedType next to each other in memory. 3103 /// RequiredAlignment gives the minimal alignment constraints that must be met 3104 /// to be able to select this paired load. 3105 /// 3106 /// This information is *not* used to generate actual paired loads, but it is 3107 /// used to generate a sequence of loads that is easier to combine into a 3108 /// paired load. 3109 /// For instance, something like this: 3110 /// a = load i64* addr 3111 /// b = trunc i64 a to i32 3112 /// c = lshr i64 a, 32 3113 /// d = trunc i64 c to i32 3114 /// will be optimized into: 3115 /// b = load i32* addr1 3116 /// d = load i32* addr2 3117 /// Where addr1 = addr2 +/- sizeof(i32). 3118 /// 3119 /// In other words, unless the target performs a post-isel load combining, 3120 /// this information should not be provided because it will generate more 3121 /// loads. 3122 virtual bool hasPairedLoad(EVT /*LoadedType*/, 3123 Align & /*RequiredAlignment*/) const { 3124 return false; 3125 } 3126 3127 /// Return true if the target has a vector blend instruction. 3128 virtual bool hasVectorBlend() const { return false; } 3129 3130 /// Get the maximum supported factor for interleaved memory accesses. 3131 /// Default to be the minimum interleave factor: 2. 3132 virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } 3133 3134 /// Lower an interleaved load to target specific intrinsics. Return 3135 /// true on success. 3136 /// 3137 /// \p LI is the vector load instruction. 3138 /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. 3139 /// \p Indices is the corresponding indices for each shufflevector. 3140 /// \p Factor is the interleave factor. 3141 virtual bool lowerInterleavedLoad(LoadInst *LI, 3142 ArrayRef<ShuffleVectorInst *> Shuffles, 3143 ArrayRef<unsigned> Indices, 3144 unsigned Factor) const { 3145 return false; 3146 } 3147 3148 /// Lower an interleaved store to target specific intrinsics. Return 3149 /// true on success. 3150 /// 3151 /// \p SI is the vector store instruction. 3152 /// \p SVI is the shufflevector to RE-interleave the stored vector. 3153 /// \p Factor is the interleave factor. 3154 virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 3155 unsigned Factor) const { 3156 return false; 3157 } 3158 3159 /// Lower a deinterleave intrinsic to a target specific load intrinsic. 3160 /// Return true on success. Currently only supports 3161 /// llvm.vector.deinterleave2 3162 /// 3163 /// \p LI is the accompanying load instruction. 3164 /// \p DeinterleaveValues contains the deinterleaved values. 3165 virtual bool 3166 lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, 3167 ArrayRef<Value *> DeinterleaveValues) const { 3168 return false; 3169 } 3170 3171 /// Lower an interleave intrinsic to a target specific store intrinsic. 3172 /// Return true on success. Currently only supports 3173 /// llvm.vector.interleave2 3174 /// 3175 /// \p SI is the accompanying store instruction 3176 /// \p InterleaveValues contains the interleaved values. 3177 virtual bool 3178 lowerInterleaveIntrinsicToStore(StoreInst *SI, 3179 ArrayRef<Value *> InterleaveValues) const { 3180 return false; 3181 } 3182 3183 /// Return true if an fpext operation is free (for instance, because 3184 /// single-precision floating-point numbers are implicitly extended to 3185 /// double-precision). 3186 virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { 3187 assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && 3188 "invalid fpext types"); 3189 return false; 3190 } 3191 3192 /// Return true if an fpext operation input to an \p Opcode operation is free 3193 /// (for instance, because half-precision floating-point numbers are 3194 /// implicitly extended to float-precision) for an FMA instruction. 3195 virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, 3196 LLT DestTy, LLT SrcTy) const { 3197 return false; 3198 } 3199 3200 /// Return true if an fpext operation input to an \p Opcode operation is free 3201 /// (for instance, because half-precision floating-point numbers are 3202 /// implicitly extended to float-precision) for an FMA instruction. 3203 virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, 3204 EVT DestVT, EVT SrcVT) const { 3205 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && 3206 "invalid fpext types"); 3207 return isFPExtFree(DestVT, SrcVT); 3208 } 3209 3210 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 3211 /// extend node) is profitable. 3212 virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } 3213 3214 /// Return true if an fneg operation is free to the point where it is never 3215 /// worthwhile to replace it with a bitwise operation. 3216 virtual bool isFNegFree(EVT VT) const { 3217 assert(VT.isFloatingPoint()); 3218 return false; 3219 } 3220 3221 /// Return true if an fabs operation is free to the point where it is never 3222 /// worthwhile to replace it with a bitwise operation. 3223 virtual bool isFAbsFree(EVT VT) const { 3224 assert(VT.isFloatingPoint()); 3225 return false; 3226 } 3227 3228 /// Return true if an FMA operation is faster than a pair of fmul and fadd 3229 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 3230 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 3231 /// 3232 /// NOTE: This may be called before legalization on types for which FMAs are 3233 /// not legal, but should return true if those types will eventually legalize 3234 /// to types that support FMAs. After legalization, it will only be called on 3235 /// types that support FMAs (via Legal or Custom actions) 3236 /// 3237 /// Targets that care about soft float support should return false when soft 3238 /// float code is being generated (i.e. use-soft-float). 3239 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 3240 EVT) const { 3241 return false; 3242 } 3243 3244 /// Return true if an FMA operation is faster than a pair of fmul and fadd 3245 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 3246 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 3247 /// 3248 /// NOTE: This may be called before legalization on types for which FMAs are 3249 /// not legal, but should return true if those types will eventually legalize 3250 /// to types that support FMAs. After legalization, it will only be called on 3251 /// types that support FMAs (via Legal or Custom actions) 3252 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 3253 LLT) const { 3254 return false; 3255 } 3256 3257 /// IR version 3258 virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { 3259 return false; 3260 } 3261 3262 /// Returns true if \p MI can be combined with another instruction to 3263 /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD, 3264 /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be 3265 /// distributed into an fadd/fsub. 3266 virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const { 3267 assert((MI.getOpcode() == TargetOpcode::G_FADD || 3268 MI.getOpcode() == TargetOpcode::G_FSUB || 3269 MI.getOpcode() == TargetOpcode::G_FMUL) && 3270 "unexpected node in FMAD forming combine"); 3271 switch (Ty.getScalarSizeInBits()) { 3272 case 16: 3273 return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16); 3274 case 32: 3275 return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32); 3276 case 64: 3277 return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64); 3278 default: 3279 break; 3280 } 3281 3282 return false; 3283 } 3284 3285 /// Returns true if be combined with to form an ISD::FMAD. \p N may be an 3286 /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an 3287 /// fadd/fsub. 3288 virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { 3289 assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || 3290 N->getOpcode() == ISD::FMUL) && 3291 "unexpected node in FMAD forming combine"); 3292 return isOperationLegal(ISD::FMAD, N->getValueType(0)); 3293 } 3294 3295 // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather 3296 // than FMUL and ADD is delegated to the machine combiner. 3297 virtual bool generateFMAsInMachineCombiner(EVT VT, 3298 CodeGenOptLevel OptLevel) const { 3299 return false; 3300 } 3301 3302 /// Return true if it's profitable to narrow operations of type SrcVT to 3303 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not from 3304 /// i32 to i16. 3305 virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const { 3306 return false; 3307 } 3308 3309 /// Return true if pulling a binary operation into a select with an identity 3310 /// constant is profitable. This is the inverse of an IR transform. 3311 /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X 3312 virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 3313 EVT VT) const { 3314 return false; 3315 } 3316 3317 /// Return true if it is beneficial to convert a load of a constant to 3318 /// just the constant itself. 3319 /// On some targets it might be more efficient to use a combination of 3320 /// arithmetic instructions to materialize the constant instead of loading it 3321 /// from a constant pool. 3322 virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 3323 Type *Ty) const { 3324 return false; 3325 } 3326 3327 /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type 3328 /// from this source type with this index. This is needed because 3329 /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of 3330 /// the first element, and only the target knows which lowering is cheap. 3331 virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 3332 unsigned Index) const { 3333 return false; 3334 } 3335 3336 /// Try to convert an extract element of a vector binary operation into an 3337 /// extract element followed by a scalar operation. 3338 virtual bool shouldScalarizeBinop(SDValue VecOp) const { 3339 return false; 3340 } 3341 3342 /// Return true if extraction of a scalar element from the given vector type 3343 /// at the given index is cheap. For example, if scalar operations occur on 3344 /// the same register file as vector operations, then an extract element may 3345 /// be a sub-register rename rather than an actual instruction. 3346 virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const { 3347 return false; 3348 } 3349 3350 /// Try to convert math with an overflow comparison into the corresponding DAG 3351 /// node operation. Targets may want to override this independently of whether 3352 /// the operation is legal/custom for the given type because it may obscure 3353 /// matching of other patterns. 3354 virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 3355 bool MathUsed) const { 3356 // TODO: The default logic is inherited from code in CodeGenPrepare. 3357 // The opcode should not make a difference by default? 3358 if (Opcode != ISD::UADDO) 3359 return false; 3360 3361 // Allow the transform as long as we have an integer type that is not 3362 // obviously illegal and unsupported and if the math result is used 3363 // besides the overflow check. On some targets (e.g. SPARC), it is 3364 // not profitable to form on overflow op if the math result has no 3365 // concrete users. 3366 if (VT.isVector()) 3367 return false; 3368 return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT)); 3369 } 3370 3371 // Return true if it is profitable to use a scalar input to a BUILD_VECTOR 3372 // even if the vector itself has multiple uses. 3373 virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { 3374 return false; 3375 } 3376 3377 // Return true if CodeGenPrepare should consider splitting large offset of a 3378 // GEP to make the GEP fit into the addressing mode and can be sunk into the 3379 // same blocks of its users. 3380 virtual bool shouldConsiderGEPOffsetSplit() const { return false; } 3381 3382 /// Return true if creating a shift of the type by the given 3383 /// amount is not profitable. 3384 virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const { 3385 return false; 3386 } 3387 3388 // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) 3389 // A) where y has a single bit set? 3390 virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, 3391 const APInt &AndMask) const { 3392 unsigned ShCt = AndMask.getBitWidth() - 1; 3393 return !shouldAvoidTransformToShift(VT, ShCt); 3394 } 3395 3396 /// Does this target require the clearing of high-order bits in a register 3397 /// passed to the fp16 to fp conversion library function. 3398 virtual bool shouldKeepZExtForFP16Conv() const { return false; } 3399 3400 /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT 3401 /// from min(max(fptoi)) saturation patterns. 3402 virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const { 3403 return isOperationLegalOrCustom(Op, VT); 3404 } 3405 3406 /// Should we expand [US]CMP nodes using two selects and two compares, or by 3407 /// doing arithmetic on boolean types 3408 virtual bool shouldExpandCmpUsingSelects(EVT VT) const { return false; } 3409 3410 /// Does this target support complex deinterleaving 3411 virtual bool isComplexDeinterleavingSupported() const { return false; } 3412 3413 /// Does this target support complex deinterleaving with the given operation 3414 /// and type 3415 virtual bool isComplexDeinterleavingOperationSupported( 3416 ComplexDeinterleavingOperation Operation, Type *Ty) const { 3417 return false; 3418 } 3419 3420 /// Create the IR node for the given complex deinterleaving operation. 3421 /// If one cannot be created using all the given inputs, nullptr should be 3422 /// returned. 3423 virtual Value *createComplexDeinterleavingIR( 3424 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, 3425 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, 3426 Value *Accumulator = nullptr) const { 3427 return nullptr; 3428 } 3429 3430 /// Rename the default libcall routine name for the specified libcall. 3431 void setLibcallName(RTLIB::Libcall Call, const char *Name) { 3432 Libcalls.setLibcallName(Call, Name); 3433 } 3434 3435 void setLibcallName(ArrayRef<RTLIB::Libcall> Calls, const char *Name) { 3436 Libcalls.setLibcallName(Calls, Name); 3437 } 3438 3439 /// Get the libcall routine name for the specified libcall. 3440 const char *getLibcallName(RTLIB::Libcall Call) const { 3441 return Libcalls.getLibcallName(Call); 3442 } 3443 3444 /// Override the default CondCode to be used to test the result of the 3445 /// comparison libcall against zero. 3446 /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD. 3447 void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { 3448 CmpLibcallCCs[Call] = CC; 3449 } 3450 3451 3452 /// Get the CondCode that's to be used to test the result of the comparison 3453 /// libcall against zero. 3454 /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD. 3455 ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { 3456 return CmpLibcallCCs[Call]; 3457 } 3458 3459 3460 /// Set the CallingConv that should be used for the specified libcall. 3461 void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { 3462 Libcalls.setLibcallCallingConv(Call, CC); 3463 } 3464 3465 /// Get the CallingConv that should be used for the specified libcall. 3466 CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { 3467 return Libcalls.getLibcallCallingConv(Call); 3468 } 3469 3470 /// Execute target specific actions to finalize target lowering. 3471 /// This is used to set extra flags in MachineFrameInformation and freezing 3472 /// the set of reserved registers. 3473 /// The default implementation just freezes the set of reserved registers. 3474 virtual void finalizeLowering(MachineFunction &MF) const; 3475 3476 //===----------------------------------------------------------------------===// 3477 // GlobalISel Hooks 3478 //===----------------------------------------------------------------------===// 3479 /// Check whether or not \p MI needs to be moved close to its uses. 3480 virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const; 3481 3482 3483 private: 3484 const TargetMachine &TM; 3485 3486 /// Tells the code generator that the target has multiple (allocatable) 3487 /// condition registers that can be used to store the results of comparisons 3488 /// for use by selects and conditional branches. With multiple condition 3489 /// registers, the code generator will not aggressively sink comparisons into 3490 /// the blocks of their users. 3491 bool HasMultipleConditionRegisters; 3492 3493 /// Tells the code generator that the target has BitExtract instructions. 3494 /// The code generator will aggressively sink "shift"s into the blocks of 3495 /// their users if the users will generate "and" instructions which can be 3496 /// combined with "shift" to BitExtract instructions. 3497 bool HasExtractBitsInsn; 3498 3499 /// Tells the code generator to bypass slow divide or remainder 3500 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code 3501 /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer 3502 /// div/rem when the operands are positive and less than 256. 3503 DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; 3504 3505 /// Tells the code generator that it shouldn't generate extra flow control 3506 /// instructions and should attempt to combine flow control instructions via 3507 /// predication. 3508 bool JumpIsExpensive; 3509 3510 /// Information about the contents of the high-bits in boolean values held in 3511 /// a type wider than i1. See getBooleanContents. 3512 BooleanContent BooleanContents; 3513 3514 /// Information about the contents of the high-bits in boolean values held in 3515 /// a type wider than i1. See getBooleanContents. 3516 BooleanContent BooleanFloatContents; 3517 3518 /// Information about the contents of the high-bits in boolean vector values 3519 /// when the element type is wider than i1. See getBooleanContents. 3520 BooleanContent BooleanVectorContents; 3521 3522 /// The target scheduling preference: shortest possible total cycles or lowest 3523 /// register usage. 3524 Sched::Preference SchedPreferenceInfo; 3525 3526 /// The minimum alignment that any argument on the stack needs to have. 3527 Align MinStackArgumentAlignment; 3528 3529 /// The minimum function alignment (used when optimizing for size, and to 3530 /// prevent explicitly provided alignment from leading to incorrect code). 3531 Align MinFunctionAlignment; 3532 3533 /// The preferred function alignment (used when alignment unspecified and 3534 /// optimizing for speed). 3535 Align PrefFunctionAlignment; 3536 3537 /// The preferred loop alignment (in log2 bot in bytes). 3538 Align PrefLoopAlignment; 3539 /// The maximum amount of bytes permitted to be emitted for alignment. 3540 unsigned MaxBytesForAlignment; 3541 3542 /// Size in bits of the maximum atomics size the backend supports. 3543 /// Accesses larger than this will be expanded by AtomicExpandPass. 3544 unsigned MaxAtomicSizeInBitsSupported; 3545 3546 /// Size in bits of the maximum div/rem size the backend supports. 3547 /// Larger operations will be expanded by ExpandLargeDivRem. 3548 unsigned MaxDivRemBitWidthSupported; 3549 3550 /// Size in bits of the maximum larget fp convert size the backend 3551 /// supports. Larger operations will be expanded by ExpandLargeFPConvert. 3552 unsigned MaxLargeFPConvertBitWidthSupported; 3553 3554 /// Size in bits of the minimum cmpxchg or ll/sc operation the 3555 /// backend supports. 3556 unsigned MinCmpXchgSizeInBits; 3557 3558 /// This indicates if the target supports unaligned atomic operations. 3559 bool SupportsUnalignedAtomics; 3560 3561 /// If set to a physical register, this specifies the register that 3562 /// llvm.savestack/llvm.restorestack should save and restore. 3563 Register StackPointerRegisterToSaveRestore; 3564 3565 /// This indicates the default register class to use for each ValueType the 3566 /// target supports natively. 3567 const TargetRegisterClass *RegClassForVT[MVT::VALUETYPE_SIZE]; 3568 uint16_t NumRegistersForVT[MVT::VALUETYPE_SIZE]; 3569 MVT RegisterTypeForVT[MVT::VALUETYPE_SIZE]; 3570 3571 /// This indicates the "representative" register class to use for each 3572 /// ValueType the target supports natively. This information is used by the 3573 /// scheduler to track register pressure. By default, the representative 3574 /// register class is the largest legal super-reg register class of the 3575 /// register class of the specified type. e.g. On x86, i8, i16, and i32's 3576 /// representative class would be GR32. 3577 const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0}; 3578 3579 /// This indicates the "cost" of the "representative" register class for each 3580 /// ValueType. The cost is used by the scheduler to approximate register 3581 /// pressure. 3582 uint8_t RepRegClassCostForVT[MVT::VALUETYPE_SIZE]; 3583 3584 /// For any value types we are promoting or expanding, this contains the value 3585 /// type that we are changing to. For Expanded types, this contains one step 3586 /// of the expand (e.g. i64 -> i32), even if there are multiple steps required 3587 /// (e.g. i64 -> i16). For types natively supported by the system, this holds 3588 /// the same type (e.g. i32 -> i32). 3589 MVT TransformToType[MVT::VALUETYPE_SIZE]; 3590 3591 /// For each operation and each value type, keep a LegalizeAction that 3592 /// indicates how instruction selection should deal with the operation. Most 3593 /// operations are Legal (aka, supported natively by the target), but 3594 /// operations that are not should be described. Note that operations on 3595 /// non-legal value types are not described here. 3596 LegalizeAction OpActions[MVT::VALUETYPE_SIZE][ISD::BUILTIN_OP_END]; 3597 3598 /// For each load extension type and each value type, keep a LegalizeAction 3599 /// that indicates how instruction selection should deal with a load of a 3600 /// specific value type and extension type. Uses 4-bits to store the action 3601 /// for each of the 4 load ext types. 3602 uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; 3603 3604 /// Similar to LoadExtActions, but for atomic loads. Only Legal or Expand 3605 /// (default) values are supported. 3606 uint16_t AtomicLoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; 3607 3608 /// For each value type pair keep a LegalizeAction that indicates whether a 3609 /// truncating store of a specific value type and truncating type is legal. 3610 LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; 3611 3612 /// For each indexed mode and each value type, keep a quad of LegalizeAction 3613 /// that indicates how instruction selection should deal with the load / 3614 /// store / maskedload / maskedstore. 3615 /// 3616 /// The first dimension is the value_type for the reference. The second 3617 /// dimension represents the various modes for load store. 3618 uint16_t IndexedModeActions[MVT::VALUETYPE_SIZE][ISD::LAST_INDEXED_MODE]; 3619 3620 /// For each condition code (ISD::CondCode) keep a LegalizeAction that 3621 /// indicates how instruction selection should deal with the condition code. 3622 /// 3623 /// Because each CC action takes up 4 bits, we need to have the array size be 3624 /// large enough to fit all of the value types. This can be done by rounding 3625 /// up the MVT::VALUETYPE_SIZE value to the next multiple of 8. 3626 uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::VALUETYPE_SIZE + 7) / 8]; 3627 3628 ValueTypeActionImpl ValueTypeActions; 3629 3630 private: 3631 /// Targets can specify ISD nodes that they would like PerformDAGCombine 3632 /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this 3633 /// array. 3634 unsigned char 3635 TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; 3636 3637 /// For operations that must be promoted to a specific type, this holds the 3638 /// destination type. This map should be sparse, so don't hold it as an 3639 /// array. 3640 /// 3641 /// Targets add entries to this map with AddPromotedToType(..), clients access 3642 /// this with getTypeToPromoteTo(..). 3643 std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> 3644 PromoteToType; 3645 3646 /// The list of libcalls that the target will use. 3647 RTLIB::RuntimeLibcallsInfo Libcalls; 3648 3649 /// The ISD::CondCode that should be used to test the result of each of the 3650 /// comparison libcall against zero. 3651 ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; 3652 3653 /// The bits of IndexedModeActions used to store the legalisation actions 3654 /// We store the data as | ML | MS | L | S | each taking 4 bits. 3655 enum IndexedModeActionsBits { 3656 IMAB_Store = 0, 3657 IMAB_Load = 4, 3658 IMAB_MaskedStore = 8, 3659 IMAB_MaskedLoad = 12 3660 }; 3661 3662 void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, 3663 LegalizeAction Action) { 3664 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && 3665 (unsigned)Action < 0xf && "Table isn't big enough!"); 3666 unsigned Ty = (unsigned)VT.SimpleTy; 3667 IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); 3668 IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; 3669 } 3670 3671 LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, 3672 unsigned Shift) const { 3673 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && 3674 "Table isn't big enough!"); 3675 unsigned Ty = (unsigned)VT.SimpleTy; 3676 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); 3677 } 3678 3679 protected: 3680 /// Return true if the extension represented by \p I is free. 3681 /// \pre \p I is a sign, zero, or fp extension and 3682 /// is[Z|FP]ExtFree of the related types is not true. 3683 virtual bool isExtFreeImpl(const Instruction *I) const { return false; } 3684 3685 /// Depth that GatherAllAliases should continue looking for chain 3686 /// dependencies when trying to find a more preferable chain. As an 3687 /// approximation, this should be more than the number of consecutive stores 3688 /// expected to be merged. 3689 unsigned GatherAllAliasesMaxDepth; 3690 3691 /// \brief Specify maximum number of store instructions per memset call. 3692 /// 3693 /// When lowering \@llvm.memset this field specifies the maximum number of 3694 /// store operations that may be substituted for the call to memset. Targets 3695 /// must set this value based on the cost threshold for that target. Targets 3696 /// should assume that the memset will be done using as many of the largest 3697 /// store operations first, followed by smaller ones, if necessary, per 3698 /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine 3699 /// with 16-bit alignment would result in four 2-byte stores and one 1-byte 3700 /// store. This only applies to setting a constant array of a constant size. 3701 unsigned MaxStoresPerMemset; 3702 /// Likewise for functions with the OptSize attribute. 3703 unsigned MaxStoresPerMemsetOptSize; 3704 3705 /// \brief Specify maximum number of store instructions per memcpy call. 3706 /// 3707 /// When lowering \@llvm.memcpy this field specifies the maximum number of 3708 /// store operations that may be substituted for a call to memcpy. Targets 3709 /// must set this value based on the cost threshold for that target. Targets 3710 /// should assume that the memcpy will be done using as many of the largest 3711 /// store operations first, followed by smaller ones, if necessary, per 3712 /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine 3713 /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store 3714 /// and one 1-byte store. This only applies to copying a constant array of 3715 /// constant size. 3716 unsigned MaxStoresPerMemcpy; 3717 /// Likewise for functions with the OptSize attribute. 3718 unsigned MaxStoresPerMemcpyOptSize; 3719 /// \brief Specify max number of store instructions to glue in inlined memcpy. 3720 /// 3721 /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number 3722 /// of store instructions to keep together. This helps in pairing and 3723 // vectorization later on. 3724 unsigned MaxGluedStoresPerMemcpy = 0; 3725 3726 /// \brief Specify maximum number of load instructions per memcmp call. 3727 /// 3728 /// When lowering \@llvm.memcmp this field specifies the maximum number of 3729 /// pairs of load operations that may be substituted for a call to memcmp. 3730 /// Targets must set this value based on the cost threshold for that target. 3731 /// Targets should assume that the memcmp will be done using as many of the 3732 /// largest load operations first, followed by smaller ones, if necessary, per 3733 /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine 3734 /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load 3735 /// and one 1-byte load. This only applies to copying a constant array of 3736 /// constant size. 3737 unsigned MaxLoadsPerMemcmp; 3738 /// Likewise for functions with the OptSize attribute. 3739 unsigned MaxLoadsPerMemcmpOptSize; 3740 3741 /// \brief Specify maximum number of store instructions per memmove call. 3742 /// 3743 /// When lowering \@llvm.memmove this field specifies the maximum number of 3744 /// store instructions that may be substituted for a call to memmove. Targets 3745 /// must set this value based on the cost threshold for that target. Targets 3746 /// should assume that the memmove will be done using as many of the largest 3747 /// store operations first, followed by smaller ones, if necessary, per 3748 /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine 3749 /// with 8-bit alignment would result in nine 1-byte stores. This only 3750 /// applies to copying a constant array of constant size. 3751 unsigned MaxStoresPerMemmove; 3752 /// Likewise for functions with the OptSize attribute. 3753 unsigned MaxStoresPerMemmoveOptSize; 3754 3755 /// Tells the code generator that select is more expensive than a branch if 3756 /// the branch is usually predicted right. 3757 bool PredictableSelectIsExpensive; 3758 3759 /// \see enableExtLdPromotion. 3760 bool EnableExtLdPromotion; 3761 3762 /// Return true if the value types that can be represented by the specified 3763 /// register class are all legal. 3764 bool isLegalRC(const TargetRegisterInfo &TRI, 3765 const TargetRegisterClass &RC) const; 3766 3767 /// Replace/modify any TargetFrameIndex operands with a targte-dependent 3768 /// sequence of memory operands that is recognized by PrologEpilogInserter. 3769 MachineBasicBlock *emitPatchPoint(MachineInstr &MI, 3770 MachineBasicBlock *MBB) const; 3771 3772 bool IsStrictFPEnabled; 3773 }; 3774 3775 /// This class defines information used to lower LLVM code to legal SelectionDAG 3776 /// operators that the target instruction selector can accept natively. 3777 /// 3778 /// This class also defines callbacks that targets must implement to lower 3779 /// target-specific constructs to SelectionDAG operators. 3780 class TargetLowering : public TargetLoweringBase { 3781 public: 3782 struct DAGCombinerInfo; 3783 struct MakeLibCallOptions; 3784 3785 TargetLowering(const TargetLowering &) = delete; 3786 TargetLowering &operator=(const TargetLowering &) = delete; 3787 3788 explicit TargetLowering(const TargetMachine &TM); 3789 3790 bool isPositionIndependent() const; 3791 3792 virtual bool isSDNodeSourceOfDivergence(const SDNode *N, 3793 FunctionLoweringInfo *FLI, 3794 UniformityInfo *UA) const { 3795 return false; 3796 } 3797 3798 // Lets target to control the following reassociation of operands: (op (op x, 3799 // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By 3800 // default consider profitable any case where N0 has single use. This 3801 // behavior reflects the condition replaced by this target hook call in the 3802 // DAGCombiner. Any particular target can implement its own heuristic to 3803 // restrict common combiner. 3804 virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, 3805 SDValue N1) const { 3806 return N0.hasOneUse(); 3807 } 3808 3809 // Lets target to control the following reassociation of operands: (op (op x, 3810 // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By 3811 // default consider profitable any case where N0 has single use. This 3812 // behavior reflects the condition replaced by this target hook call in the 3813 // combiner. Any particular target can implement its own heuristic to 3814 // restrict common combiner. 3815 virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, 3816 Register N1) const { 3817 return MRI.hasOneNonDBGUse(N0); 3818 } 3819 3820 virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { 3821 return false; 3822 } 3823 3824 /// Returns true by value, base pointer and offset pointer and addressing mode 3825 /// by reference if the node's address can be legally represented as 3826 /// pre-indexed load / store address. 3827 virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, 3828 SDValue &/*Offset*/, 3829 ISD::MemIndexedMode &/*AM*/, 3830 SelectionDAG &/*DAG*/) const { 3831 return false; 3832 } 3833 3834 /// Returns true by value, base pointer and offset pointer and addressing mode 3835 /// by reference if this node can be combined with a load / store to form a 3836 /// post-indexed load / store. 3837 virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, 3838 SDValue &/*Base*/, 3839 SDValue &/*Offset*/, 3840 ISD::MemIndexedMode &/*AM*/, 3841 SelectionDAG &/*DAG*/) const { 3842 return false; 3843 } 3844 3845 /// Returns true if the specified base+offset is a legal indexed addressing 3846 /// mode for this target. \p MI is the load or store instruction that is being 3847 /// considered for transformation. 3848 virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, 3849 bool IsPre, MachineRegisterInfo &MRI) const { 3850 return false; 3851 } 3852 3853 /// Return the entry encoding for a jump table in the current function. The 3854 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. 3855 virtual unsigned getJumpTableEncoding() const; 3856 3857 virtual MVT getJumpTableRegTy(const DataLayout &DL) const { 3858 return getPointerTy(DL); 3859 } 3860 3861 virtual const MCExpr * 3862 LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, 3863 const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, 3864 MCContext &/*Ctx*/) const { 3865 llvm_unreachable("Need to implement this hook if target has custom JTIs"); 3866 } 3867 3868 /// Returns relocation base for the given PIC jumptable. 3869 virtual SDValue getPICJumpTableRelocBase(SDValue Table, 3870 SelectionDAG &DAG) const; 3871 3872 /// This returns the relocation base for the given PIC jumptable, the same as 3873 /// getPICJumpTableRelocBase, but as an MCExpr. 3874 virtual const MCExpr * 3875 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 3876 unsigned JTI, MCContext &Ctx) const; 3877 3878 /// Return true if folding a constant offset with the given GlobalAddress is 3879 /// legal. It is frequently not legal in PIC relocation models. 3880 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; 3881 3882 /// On x86, return true if the operand with index OpNo is a CALL or JUMP 3883 /// instruction, which can use either a memory constraint or an address 3884 /// constraint. -fasm-blocks "__asm call foo" lowers to 3885 /// call void asm sideeffect inteldialect "call ${0:P}", "*m..." 3886 /// 3887 /// This function is used by a hack to choose the address constraint, 3888 /// lowering to a direct call. 3889 virtual bool 3890 isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, 3891 unsigned OpNo) const { 3892 return false; 3893 } 3894 3895 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, 3896 SDValue &Chain) const; 3897 3898 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, 3899 SDValue &NewRHS, ISD::CondCode &CCCode, 3900 const SDLoc &DL, const SDValue OldLHS, 3901 const SDValue OldRHS) const; 3902 3903 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, 3904 SDValue &NewRHS, ISD::CondCode &CCCode, 3905 const SDLoc &DL, const SDValue OldLHS, 3906 const SDValue OldRHS, SDValue &Chain, 3907 bool IsSignaling = false) const; 3908 3909 virtual SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, 3910 SDValue Chain, MachineMemOperand *MMO, 3911 SDValue &NewLoad, SDValue Ptr, 3912 SDValue PassThru, SDValue Mask) const { 3913 llvm_unreachable("Not Implemented"); 3914 } 3915 3916 virtual SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, 3917 SDValue Chain, MachineMemOperand *MMO, 3918 SDValue Ptr, SDValue Val, 3919 SDValue Mask) const { 3920 llvm_unreachable("Not Implemented"); 3921 } 3922 3923 /// Returns a pair of (return value, chain). 3924 /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. 3925 std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, 3926 EVT RetVT, ArrayRef<SDValue> Ops, 3927 MakeLibCallOptions CallOptions, 3928 const SDLoc &dl, 3929 SDValue Chain = SDValue()) const; 3930 3931 /// Check whether parameters to a call that are passed in callee saved 3932 /// registers are the same as from the calling function. This needs to be 3933 /// checked for tail call eligibility. 3934 bool parametersInCSRMatch(const MachineRegisterInfo &MRI, 3935 const uint32_t *CallerPreservedMask, 3936 const SmallVectorImpl<CCValAssign> &ArgLocs, 3937 const SmallVectorImpl<SDValue> &OutVals) const; 3938 3939 //===--------------------------------------------------------------------===// 3940 // TargetLowering Optimization Methods 3941 // 3942 3943 /// A convenience struct that encapsulates a DAG, and two SDValues for 3944 /// returning information from TargetLowering to its clients that want to 3945 /// combine. 3946 struct TargetLoweringOpt { 3947 SelectionDAG &DAG; 3948 bool LegalTys; 3949 bool LegalOps; 3950 SDValue Old; 3951 SDValue New; 3952 3953 explicit TargetLoweringOpt(SelectionDAG &InDAG, 3954 bool LT, bool LO) : 3955 DAG(InDAG), LegalTys(LT), LegalOps(LO) {} 3956 3957 bool LegalTypes() const { return LegalTys; } 3958 bool LegalOperations() const { return LegalOps; } 3959 3960 bool CombineTo(SDValue O, SDValue N) { 3961 Old = O; 3962 New = N; 3963 return true; 3964 } 3965 }; 3966 3967 /// Determines the optimal series of memory ops to replace the memset / memcpy. 3968 /// Return true if the number of memory ops is below the threshold (Limit). 3969 /// Note that this is always the case when Limit is ~0. 3970 /// It returns the types of the sequence of memory ops to perform 3971 /// memset / memcpy by reference. 3972 virtual bool 3973 findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, 3974 const MemOp &Op, unsigned DstAS, unsigned SrcAS, 3975 const AttributeList &FuncAttributes) const; 3976 3977 /// Check to see if the specified operand of the specified instruction is a 3978 /// constant integer. If so, check to see if there are any bits set in the 3979 /// constant that are not demanded. If so, shrink the constant and return 3980 /// true. 3981 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 3982 const APInt &DemandedElts, 3983 TargetLoweringOpt &TLO) const; 3984 3985 /// Helper wrapper around ShrinkDemandedConstant, demanding all elements. 3986 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 3987 TargetLoweringOpt &TLO) const; 3988 3989 // Target hook to do target-specific const optimization, which is called by 3990 // ShrinkDemandedConstant. This function should return true if the target 3991 // doesn't want ShrinkDemandedConstant to further optimize the constant. 3992 virtual bool targetShrinkDemandedConstant(SDValue Op, 3993 const APInt &DemandedBits, 3994 const APInt &DemandedElts, 3995 TargetLoweringOpt &TLO) const { 3996 return false; 3997 } 3998 3999 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. 4000 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast, 4001 /// but it could be generalized for targets with other types of implicit 4002 /// widening casts. 4003 bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, 4004 const APInt &DemandedBits, 4005 TargetLoweringOpt &TLO) const; 4006 4007 /// Look at Op. At this point, we know that only the DemandedBits bits of the 4008 /// result of Op are ever used downstream. If we can use this information to 4009 /// simplify Op, create a new simplified DAG node and return true, returning 4010 /// the original and new nodes in Old and New. Otherwise, analyze the 4011 /// expression and return a mask of KnownOne and KnownZero bits for the 4012 /// expression (used to simplify the caller). The KnownZero/One bits may only 4013 /// be accurate for those bits in the Demanded masks. 4014 /// \p AssumeSingleUse When this parameter is true, this function will 4015 /// attempt to simplify \p Op even if there are multiple uses. 4016 /// Callers are responsible for correctly updating the DAG based on the 4017 /// results of this function, because simply replacing TLO.Old 4018 /// with TLO.New will be incorrect when this parameter is true and TLO.Old 4019 /// has multiple uses. 4020 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 4021 const APInt &DemandedElts, KnownBits &Known, 4022 TargetLoweringOpt &TLO, unsigned Depth = 0, 4023 bool AssumeSingleUse = false) const; 4024 4025 /// Helper wrapper around SimplifyDemandedBits, demanding all elements. 4026 /// Adds Op back to the worklist upon success. 4027 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 4028 KnownBits &Known, TargetLoweringOpt &TLO, 4029 unsigned Depth = 0, 4030 bool AssumeSingleUse = false) const; 4031 4032 /// Helper wrapper around SimplifyDemandedBits. 4033 /// Adds Op back to the worklist upon success. 4034 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 4035 DAGCombinerInfo &DCI) const; 4036 4037 /// Helper wrapper around SimplifyDemandedBits. 4038 /// Adds Op back to the worklist upon success. 4039 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 4040 const APInt &DemandedElts, 4041 DAGCombinerInfo &DCI) const; 4042 4043 /// More limited version of SimplifyDemandedBits that can be used to "look 4044 /// through" ops that don't contribute to the DemandedBits/DemandedElts - 4045 /// bitwise ops etc. 4046 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, 4047 const APInt &DemandedElts, 4048 SelectionDAG &DAG, 4049 unsigned Depth = 0) const; 4050 4051 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all 4052 /// elements. 4053 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, 4054 SelectionDAG &DAG, 4055 unsigned Depth = 0) const; 4056 4057 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all 4058 /// bits from only some vector elements. 4059 SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, 4060 const APInt &DemandedElts, 4061 SelectionDAG &DAG, 4062 unsigned Depth = 0) const; 4063 4064 /// Look at Vector Op. At this point, we know that only the DemandedElts 4065 /// elements of the result of Op are ever used downstream. If we can use 4066 /// this information to simplify Op, create a new simplified DAG node and 4067 /// return true, storing the original and new nodes in TLO. 4068 /// Otherwise, analyze the expression and return a mask of KnownUndef and 4069 /// KnownZero elements for the expression (used to simplify the caller). 4070 /// The KnownUndef/Zero elements may only be accurate for those bits 4071 /// in the DemandedMask. 4072 /// \p AssumeSingleUse When this parameter is true, this function will 4073 /// attempt to simplify \p Op even if there are multiple uses. 4074 /// Callers are responsible for correctly updating the DAG based on the 4075 /// results of this function, because simply replacing TLO.Old 4076 /// with TLO.New will be incorrect when this parameter is true and TLO.Old 4077 /// has multiple uses. 4078 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, 4079 APInt &KnownUndef, APInt &KnownZero, 4080 TargetLoweringOpt &TLO, unsigned Depth = 0, 4081 bool AssumeSingleUse = false) const; 4082 4083 /// Helper wrapper around SimplifyDemandedVectorElts. 4084 /// Adds Op back to the worklist upon success. 4085 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, 4086 DAGCombinerInfo &DCI) const; 4087 4088 /// Return true if the target supports simplifying demanded vector elements by 4089 /// converting them to undefs. 4090 virtual bool 4091 shouldSimplifyDemandedVectorElts(SDValue Op, 4092 const TargetLoweringOpt &TLO) const { 4093 return true; 4094 } 4095 4096 /// Determine which of the bits specified in Mask are known to be either zero 4097 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts 4098 /// argument allows us to only collect the known bits that are shared by the 4099 /// requested vector elements. 4100 virtual void computeKnownBitsForTargetNode(const SDValue Op, 4101 KnownBits &Known, 4102 const APInt &DemandedElts, 4103 const SelectionDAG &DAG, 4104 unsigned Depth = 0) const; 4105 4106 /// Determine which of the bits specified in Mask are known to be either zero 4107 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts 4108 /// argument allows us to only collect the known bits that are shared by the 4109 /// requested vector elements. This is for GISel. 4110 virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, 4111 Register R, KnownBits &Known, 4112 const APInt &DemandedElts, 4113 const MachineRegisterInfo &MRI, 4114 unsigned Depth = 0) const; 4115 4116 /// Determine the known alignment for the pointer value \p R. This is can 4117 /// typically be inferred from the number of low known 0 bits. However, for a 4118 /// pointer with a non-integral address space, the alignment value may be 4119 /// independent from the known low bits. 4120 virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, 4121 Register R, 4122 const MachineRegisterInfo &MRI, 4123 unsigned Depth = 0) const; 4124 4125 /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. 4126 /// Default implementation computes low bits based on alignment 4127 /// information. This should preserve known bits passed into it. 4128 virtual void computeKnownBitsForFrameIndex(int FIOp, 4129 KnownBits &Known, 4130 const MachineFunction &MF) const; 4131 4132 /// This method can be implemented by targets that want to expose additional 4133 /// information about sign bits to the DAG Combiner. The DemandedElts 4134 /// argument allows us to only collect the minimum sign bits that are shared 4135 /// by the requested vector elements. 4136 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 4137 const APInt &DemandedElts, 4138 const SelectionDAG &DAG, 4139 unsigned Depth = 0) const; 4140 4141 /// This method can be implemented by targets that want to expose additional 4142 /// information about sign bits to GlobalISel combiners. The DemandedElts 4143 /// argument allows us to only collect the minimum sign bits that are shared 4144 /// by the requested vector elements. 4145 virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, 4146 Register R, 4147 const APInt &DemandedElts, 4148 const MachineRegisterInfo &MRI, 4149 unsigned Depth = 0) const; 4150 4151 /// Attempt to simplify any target nodes based on the demanded vector 4152 /// elements, returning true on success. Otherwise, analyze the expression and 4153 /// return a mask of KnownUndef and KnownZero elements for the expression 4154 /// (used to simplify the caller). The KnownUndef/Zero elements may only be 4155 /// accurate for those bits in the DemandedMask. 4156 virtual bool SimplifyDemandedVectorEltsForTargetNode( 4157 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, 4158 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; 4159 4160 /// Attempt to simplify any target nodes based on the demanded bits/elts, 4161 /// returning true on success. Otherwise, analyze the 4162 /// expression and return a mask of KnownOne and KnownZero bits for the 4163 /// expression (used to simplify the caller). The KnownZero/One bits may only 4164 /// be accurate for those bits in the Demanded masks. 4165 virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, 4166 const APInt &DemandedBits, 4167 const APInt &DemandedElts, 4168 KnownBits &Known, 4169 TargetLoweringOpt &TLO, 4170 unsigned Depth = 0) const; 4171 4172 /// More limited version of SimplifyDemandedBits that can be used to "look 4173 /// through" ops that don't contribute to the DemandedBits/DemandedElts - 4174 /// bitwise ops etc. 4175 virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 4176 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 4177 SelectionDAG &DAG, unsigned Depth) const; 4178 4179 /// Return true if this function can prove that \p Op is never poison 4180 /// and, if \p PoisonOnly is false, does not have undef bits. The DemandedElts 4181 /// argument limits the check to the requested vector elements. 4182 virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( 4183 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 4184 bool PoisonOnly, unsigned Depth) const; 4185 4186 /// Return true if Op can create undef or poison from non-undef & non-poison 4187 /// operands. The DemandedElts argument limits the check to the requested 4188 /// vector elements. 4189 virtual bool 4190 canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, 4191 const SelectionDAG &DAG, bool PoisonOnly, 4192 bool ConsiderFlags, unsigned Depth) const; 4193 4194 /// Tries to build a legal vector shuffle using the provided parameters 4195 /// or equivalent variations. The Mask argument maybe be modified as the 4196 /// function tries different variations. 4197 /// Returns an empty SDValue if the operation fails. 4198 SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, 4199 SDValue N1, MutableArrayRef<int> Mask, 4200 SelectionDAG &DAG) const; 4201 4202 /// This method returns the constant pool value that will be loaded by LD. 4203 /// NOTE: You must check for implicit extensions of the constant by LD. 4204 virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; 4205 4206 /// If \p SNaN is false, \returns true if \p Op is known to never be any 4207 /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling 4208 /// NaN. 4209 virtual bool isKnownNeverNaNForTargetNode(SDValue Op, 4210 const SelectionDAG &DAG, 4211 bool SNaN = false, 4212 unsigned Depth = 0) const; 4213 4214 /// Return true if vector \p Op has the same value across all \p DemandedElts, 4215 /// indicating any elements which may be undef in the output \p UndefElts. 4216 virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, 4217 APInt &UndefElts, 4218 const SelectionDAG &DAG, 4219 unsigned Depth = 0) const; 4220 4221 /// Returns true if the given Opc is considered a canonical constant for the 4222 /// target, which should not be transformed back into a BUILD_VECTOR. 4223 virtual bool isTargetCanonicalConstantNode(SDValue Op) const { 4224 return Op.getOpcode() == ISD::SPLAT_VECTOR || 4225 Op.getOpcode() == ISD::SPLAT_VECTOR_PARTS; 4226 } 4227 4228 struct DAGCombinerInfo { 4229 void *DC; // The DAG Combiner object. 4230 CombineLevel Level; 4231 bool CalledByLegalizer; 4232 4233 public: 4234 SelectionDAG &DAG; 4235 4236 DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) 4237 : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} 4238 4239 bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } 4240 bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } 4241 bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; } 4242 CombineLevel getDAGCombineLevel() { return Level; } 4243 bool isCalledByLegalizer() const { return CalledByLegalizer; } 4244 4245 void AddToWorklist(SDNode *N); 4246 SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); 4247 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); 4248 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); 4249 4250 bool recursivelyDeleteUnusedNodes(SDNode *N); 4251 4252 void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); 4253 }; 4254 4255 /// Return if the N is a constant or constant vector equal to the true value 4256 /// from getBooleanContents(). 4257 bool isConstTrueVal(SDValue N) const; 4258 4259 /// Return if the N is a constant or constant vector equal to the false value 4260 /// from getBooleanContents(). 4261 bool isConstFalseVal(SDValue N) const; 4262 4263 /// Return if \p N is a True value when extended to \p VT. 4264 bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; 4265 4266 /// Try to simplify a setcc built with the specified operands and cc. If it is 4267 /// unable to simplify it, return a null SDValue. 4268 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 4269 bool foldBooleans, DAGCombinerInfo &DCI, 4270 const SDLoc &dl) const; 4271 4272 // For targets which wrap address, unwrap for analysis. 4273 virtual SDValue unwrapAddress(SDValue N) const { return N; } 4274 4275 /// Returns true (and the GlobalValue and the offset) if the node is a 4276 /// GlobalAddress + offset. 4277 virtual bool 4278 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; 4279 4280 /// This method will be invoked for all target nodes and for any 4281 /// target-independent nodes that the target has registered with invoke it 4282 /// for. 4283 /// 4284 /// The semantics are as follows: 4285 /// Return Value: 4286 /// SDValue.Val == 0 - No change was made 4287 /// SDValue.Val == N - N was replaced, is dead, and is already handled. 4288 /// otherwise - N should be replaced by the returned Operand. 4289 /// 4290 /// In addition, methods provided by DAGCombinerInfo may be used to perform 4291 /// more complex transformations. 4292 /// 4293 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 4294 4295 /// Return true if it is profitable to move this shift by a constant amount 4296 /// through its operand, adjusting any immediate operands as necessary to 4297 /// preserve semantics. This transformation may not be desirable if it 4298 /// disrupts a particularly auspicious target-specific tree (e.g. bitfield 4299 /// extraction in AArch64). By default, it returns true. 4300 /// 4301 /// @param N the shift node 4302 /// @param Level the current DAGCombine legalization level. 4303 virtual bool isDesirableToCommuteWithShift(const SDNode *N, 4304 CombineLevel Level) const { 4305 SDValue ShiftLHS = N->getOperand(0); 4306 if (!ShiftLHS->hasOneUse()) 4307 return false; 4308 if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND && 4309 !ShiftLHS.getOperand(0)->hasOneUse()) 4310 return false; 4311 return true; 4312 } 4313 4314 /// GlobalISel - return true if it is profitable to move this shift by a 4315 /// constant amount through its operand, adjusting any immediate operands as 4316 /// necessary to preserve semantics. This transformation may not be desirable 4317 /// if it disrupts a particularly auspicious target-specific tree (e.g. 4318 /// bitfield extraction in AArch64). By default, it returns true. 4319 /// 4320 /// @param MI the shift instruction 4321 /// @param IsAfterLegal true if running after legalization. 4322 virtual bool isDesirableToCommuteWithShift(const MachineInstr &MI, 4323 bool IsAfterLegal) const { 4324 return true; 4325 } 4326 4327 /// GlobalISel - return true if it's profitable to perform the combine: 4328 /// shl ([sza]ext x), y => zext (shl x, y) 4329 virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const { 4330 return true; 4331 } 4332 4333 // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and 4334 // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of 4335 // writing this) is: 4336 // With C as a power of 2 and C != 0 and C != INT_MIN: 4337 // AddAnd: 4338 // (icmp eq A, C) | (icmp eq A, -C) 4339 // -> (icmp eq and(add(A, C), ~(C + C)), 0) 4340 // (icmp ne A, C) & (icmp ne A, -C)w 4341 // -> (icmp ne and(add(A, C), ~(C + C)), 0) 4342 // ABS: 4343 // (icmp eq A, C) | (icmp eq A, -C) 4344 // -> (icmp eq Abs(A), C) 4345 // (icmp ne A, C) & (icmp ne A, -C)w 4346 // -> (icmp ne Abs(A), C) 4347 // 4348 // @param LogicOp the logic op 4349 // @param SETCC0 the first of the SETCC nodes 4350 // @param SETCC0 the second of the SETCC nodes 4351 virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC( 4352 const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const { 4353 return AndOrSETCCFoldKind::None; 4354 } 4355 4356 /// Return true if it is profitable to combine an XOR of a logical shift 4357 /// to create a logical shift of NOT. This transformation may not be desirable 4358 /// if it disrupts a particularly auspicious target-specific tree (e.g. 4359 /// BIC on ARM/AArch64). By default, it returns true. 4360 virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const { 4361 return true; 4362 } 4363 4364 /// Return true if the target has native support for the specified value type 4365 /// and it is 'desirable' to use the type for the given node type. e.g. On x86 4366 /// i16 is legal, but undesirable since i16 instruction encodings are longer 4367 /// and some i16 instructions are slow. 4368 virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { 4369 // By default, assume all legal types are desirable. 4370 return isTypeLegal(VT); 4371 } 4372 4373 /// Return true if it is profitable for dag combiner to transform a floating 4374 /// point op of specified opcode to a equivalent op of an integer 4375 /// type. e.g. f32 load -> i32 load can be profitable on ARM. 4376 virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, 4377 EVT /*VT*/) const { 4378 return false; 4379 } 4380 4381 /// This method query the target whether it is beneficial for dag combiner to 4382 /// promote the specified node. If true, it should return the desired 4383 /// promotion type by reference. 4384 virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { 4385 return false; 4386 } 4387 4388 /// Return true if the target supports swifterror attribute. It optimizes 4389 /// loads and stores to reading and writing a specific register. 4390 virtual bool supportSwiftError() const { 4391 return false; 4392 } 4393 4394 /// Return true if the target supports that a subset of CSRs for the given 4395 /// machine function is handled explicitly via copies. 4396 virtual bool supportSplitCSR(MachineFunction *MF) const { 4397 return false; 4398 } 4399 4400 /// Return true if the target supports kcfi operand bundles. 4401 virtual bool supportKCFIBundles() const { return false; } 4402 4403 /// Return true if the target supports ptrauth operand bundles. 4404 virtual bool supportPtrAuthBundles() const { return false; } 4405 4406 /// Perform necessary initialization to handle a subset of CSRs explicitly 4407 /// via copies. This function is called at the beginning of instruction 4408 /// selection. 4409 virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { 4410 llvm_unreachable("Not Implemented"); 4411 } 4412 4413 /// Insert explicit copies in entry and exit blocks. We copy a subset of 4414 /// CSRs to virtual registers in the entry block, and copy them back to 4415 /// physical registers in the exit blocks. This function is called at the end 4416 /// of instruction selection. 4417 virtual void insertCopiesSplitCSR( 4418 MachineBasicBlock *Entry, 4419 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 4420 llvm_unreachable("Not Implemented"); 4421 } 4422 4423 /// Return the newly negated expression if the cost is not expensive and 4424 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 4425 /// do the negation. 4426 virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 4427 bool LegalOps, bool OptForSize, 4428 NegatibleCost &Cost, 4429 unsigned Depth = 0) const; 4430 4431 SDValue getCheaperOrNeutralNegatedExpression( 4432 SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, 4433 const NegatibleCost CostThreshold = NegatibleCost::Neutral, 4434 unsigned Depth = 0) const { 4435 NegatibleCost Cost = NegatibleCost::Expensive; 4436 SDValue Neg = 4437 getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); 4438 if (!Neg) 4439 return SDValue(); 4440 4441 if (Cost <= CostThreshold) 4442 return Neg; 4443 4444 // Remove the new created node to avoid the side effect to the DAG. 4445 if (Neg->use_empty()) 4446 DAG.RemoveDeadNode(Neg.getNode()); 4447 return SDValue(); 4448 } 4449 4450 /// This is the helper function to return the newly negated expression only 4451 /// when the cost is cheaper. 4452 SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, 4453 bool LegalOps, bool OptForSize, 4454 unsigned Depth = 0) const { 4455 return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize, 4456 NegatibleCost::Cheaper, Depth); 4457 } 4458 4459 /// This is the helper function to return the newly negated expression if 4460 /// the cost is not expensive. 4461 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, 4462 bool OptForSize, unsigned Depth = 0) const { 4463 NegatibleCost Cost = NegatibleCost::Expensive; 4464 return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); 4465 } 4466 4467 //===--------------------------------------------------------------------===// 4468 // Lowering methods - These methods must be implemented by targets so that 4469 // the SelectionDAGBuilder code knows how to lower these. 4470 // 4471 4472 /// Target-specific splitting of values into parts that fit a register 4473 /// storing a legal type 4474 virtual bool splitValueIntoRegisterParts( 4475 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 4476 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 4477 return false; 4478 } 4479 4480 /// Allows the target to handle physreg-carried dependency 4481 /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether 4482 /// to add the edge to the dependency graph. 4483 /// Def - input: Selection DAG node defininfg physical register 4484 /// User - input: Selection DAG node using physical register 4485 /// Op - input: Number of User operand 4486 /// PhysReg - inout: set to the physical register if the edge is 4487 /// necessary, unchanged otherwise 4488 /// Cost - inout: physical register copy cost. 4489 /// Returns 'true' is the edge is necessary, 'false' otherwise 4490 virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, 4491 const TargetRegisterInfo *TRI, 4492 const TargetInstrInfo *TII, 4493 unsigned &PhysReg, int &Cost) const { 4494 return false; 4495 } 4496 4497 /// Target-specific combining of register parts into its original value 4498 virtual SDValue 4499 joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, 4500 const SDValue *Parts, unsigned NumParts, 4501 MVT PartVT, EVT ValueVT, 4502 std::optional<CallingConv::ID> CC) const { 4503 return SDValue(); 4504 } 4505 4506 /// This hook must be implemented to lower the incoming (formal) arguments, 4507 /// described by the Ins array, into the specified DAG. The implementation 4508 /// should fill in the InVals array with legal-type argument values, and 4509 /// return the resulting token chain value. 4510 virtual SDValue LowerFormalArguments( 4511 SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, 4512 const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, 4513 SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { 4514 llvm_unreachable("Not Implemented"); 4515 } 4516 4517 /// This structure contains the information necessary for lowering 4518 /// pointer-authenticating indirect calls. It is equivalent to the "ptrauth" 4519 /// operand bundle found on the call instruction, if any. 4520 struct PtrAuthInfo { 4521 uint64_t Key; 4522 SDValue Discriminator; 4523 }; 4524 4525 /// This structure contains all information that is necessary for lowering 4526 /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder 4527 /// needs to lower a call, and targets will see this struct in their LowerCall 4528 /// implementation. 4529 struct CallLoweringInfo { 4530 SDValue Chain; 4531 Type *RetTy = nullptr; 4532 bool RetSExt : 1; 4533 bool RetZExt : 1; 4534 bool IsVarArg : 1; 4535 bool IsInReg : 1; 4536 bool DoesNotReturn : 1; 4537 bool IsReturnValueUsed : 1; 4538 bool IsConvergent : 1; 4539 bool IsPatchPoint : 1; 4540 bool IsPreallocated : 1; 4541 bool NoMerge : 1; 4542 4543 // IsTailCall should be modified by implementations of 4544 // TargetLowering::LowerCall that perform tail call conversions. 4545 bool IsTailCall = false; 4546 4547 // Is Call lowering done post SelectionDAG type legalization. 4548 bool IsPostTypeLegalization = false; 4549 4550 unsigned NumFixedArgs = -1; 4551 CallingConv::ID CallConv = CallingConv::C; 4552 SDValue Callee; 4553 ArgListTy Args; 4554 SelectionDAG &DAG; 4555 SDLoc DL; 4556 const CallBase *CB = nullptr; 4557 SmallVector<ISD::OutputArg, 32> Outs; 4558 SmallVector<SDValue, 32> OutVals; 4559 SmallVector<ISD::InputArg, 32> Ins; 4560 SmallVector<SDValue, 4> InVals; 4561 const ConstantInt *CFIType = nullptr; 4562 SDValue ConvergenceControlToken; 4563 4564 std::optional<PtrAuthInfo> PAI; 4565 4566 CallLoweringInfo(SelectionDAG &DAG) 4567 : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), 4568 DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), 4569 IsPatchPoint(false), IsPreallocated(false), NoMerge(false), 4570 DAG(DAG) {} 4571 4572 CallLoweringInfo &setDebugLoc(const SDLoc &dl) { 4573 DL = dl; 4574 return *this; 4575 } 4576 4577 CallLoweringInfo &setChain(SDValue InChain) { 4578 Chain = InChain; 4579 return *this; 4580 } 4581 4582 // setCallee with target/module-specific attributes 4583 CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, 4584 SDValue Target, ArgListTy &&ArgsList) { 4585 RetTy = ResultType; 4586 Callee = Target; 4587 CallConv = CC; 4588 NumFixedArgs = ArgsList.size(); 4589 Args = std::move(ArgsList); 4590 4591 DAG.getTargetLoweringInfo().markLibCallAttributes( 4592 &(DAG.getMachineFunction()), CC, Args); 4593 return *this; 4594 } 4595 4596 CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, 4597 SDValue Target, ArgListTy &&ArgsList, 4598 AttributeSet ResultAttrs = {}) { 4599 RetTy = ResultType; 4600 IsInReg = ResultAttrs.hasAttribute(Attribute::InReg); 4601 RetSExt = ResultAttrs.hasAttribute(Attribute::SExt); 4602 RetZExt = ResultAttrs.hasAttribute(Attribute::ZExt); 4603 NoMerge = ResultAttrs.hasAttribute(Attribute::NoMerge); 4604 4605 Callee = Target; 4606 CallConv = CC; 4607 NumFixedArgs = ArgsList.size(); 4608 Args = std::move(ArgsList); 4609 return *this; 4610 } 4611 4612 CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, 4613 SDValue Target, ArgListTy &&ArgsList, 4614 const CallBase &Call) { 4615 RetTy = ResultType; 4616 4617 IsInReg = Call.hasRetAttr(Attribute::InReg); 4618 DoesNotReturn = 4619 Call.doesNotReturn() || 4620 (!isa<InvokeInst>(Call) && isa<UnreachableInst>(Call.getNextNode())); 4621 IsVarArg = FTy->isVarArg(); 4622 IsReturnValueUsed = !Call.use_empty(); 4623 RetSExt = Call.hasRetAttr(Attribute::SExt); 4624 RetZExt = Call.hasRetAttr(Attribute::ZExt); 4625 NoMerge = Call.hasFnAttr(Attribute::NoMerge); 4626 4627 Callee = Target; 4628 4629 CallConv = Call.getCallingConv(); 4630 NumFixedArgs = FTy->getNumParams(); 4631 Args = std::move(ArgsList); 4632 4633 CB = &Call; 4634 4635 return *this; 4636 } 4637 4638 CallLoweringInfo &setInRegister(bool Value = true) { 4639 IsInReg = Value; 4640 return *this; 4641 } 4642 4643 CallLoweringInfo &setNoReturn(bool Value = true) { 4644 DoesNotReturn = Value; 4645 return *this; 4646 } 4647 4648 CallLoweringInfo &setVarArg(bool Value = true) { 4649 IsVarArg = Value; 4650 return *this; 4651 } 4652 4653 CallLoweringInfo &setTailCall(bool Value = true) { 4654 IsTailCall = Value; 4655 return *this; 4656 } 4657 4658 CallLoweringInfo &setDiscardResult(bool Value = true) { 4659 IsReturnValueUsed = !Value; 4660 return *this; 4661 } 4662 4663 CallLoweringInfo &setConvergent(bool Value = true) { 4664 IsConvergent = Value; 4665 return *this; 4666 } 4667 4668 CallLoweringInfo &setSExtResult(bool Value = true) { 4669 RetSExt = Value; 4670 return *this; 4671 } 4672 4673 CallLoweringInfo &setZExtResult(bool Value = true) { 4674 RetZExt = Value; 4675 return *this; 4676 } 4677 4678 CallLoweringInfo &setIsPatchPoint(bool Value = true) { 4679 IsPatchPoint = Value; 4680 return *this; 4681 } 4682 4683 CallLoweringInfo &setIsPreallocated(bool Value = true) { 4684 IsPreallocated = Value; 4685 return *this; 4686 } 4687 4688 CallLoweringInfo &setPtrAuth(PtrAuthInfo Value) { 4689 PAI = Value; 4690 return *this; 4691 } 4692 4693 CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { 4694 IsPostTypeLegalization = Value; 4695 return *this; 4696 } 4697 4698 CallLoweringInfo &setCFIType(const ConstantInt *Type) { 4699 CFIType = Type; 4700 return *this; 4701 } 4702 4703 CallLoweringInfo &setConvergenceControlToken(SDValue Token) { 4704 ConvergenceControlToken = Token; 4705 return *this; 4706 } 4707 4708 ArgListTy &getArgs() { 4709 return Args; 4710 } 4711 }; 4712 4713 /// This structure is used to pass arguments to makeLibCall function. 4714 struct MakeLibCallOptions { 4715 // By passing type list before soften to makeLibCall, the target hook 4716 // shouldExtendTypeInLibCall can get the original type before soften. 4717 ArrayRef<EVT> OpsVTBeforeSoften; 4718 EVT RetVTBeforeSoften; 4719 bool IsSigned : 1; 4720 bool DoesNotReturn : 1; 4721 bool IsReturnValueUsed : 1; 4722 bool IsPostTypeLegalization : 1; 4723 bool IsSoften : 1; 4724 4725 MakeLibCallOptions() 4726 : IsSigned(false), DoesNotReturn(false), IsReturnValueUsed(true), 4727 IsPostTypeLegalization(false), IsSoften(false) {} 4728 4729 MakeLibCallOptions &setIsSigned(bool Value = true) { 4730 IsSigned = Value; 4731 return *this; 4732 } 4733 4734 MakeLibCallOptions &setNoReturn(bool Value = true) { 4735 DoesNotReturn = Value; 4736 return *this; 4737 } 4738 4739 MakeLibCallOptions &setDiscardResult(bool Value = true) { 4740 IsReturnValueUsed = !Value; 4741 return *this; 4742 } 4743 4744 MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { 4745 IsPostTypeLegalization = Value; 4746 return *this; 4747 } 4748 4749 MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, 4750 bool Value = true) { 4751 OpsVTBeforeSoften = OpsVT; 4752 RetVTBeforeSoften = RetVT; 4753 IsSoften = Value; 4754 return *this; 4755 } 4756 }; 4757 4758 /// This function lowers an abstract call to a function into an actual call. 4759 /// This returns a pair of operands. The first element is the return value 4760 /// for the function (if RetTy is not VoidTy). The second element is the 4761 /// outgoing token chain. It calls LowerCall to do the actual lowering. 4762 std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; 4763 4764 /// This hook must be implemented to lower calls into the specified 4765 /// DAG. The outgoing arguments to the call are described by the Outs array, 4766 /// and the values to be returned by the call are described by the Ins 4767 /// array. The implementation should fill in the InVals array with legal-type 4768 /// return values from the call, and return the resulting token chain value. 4769 virtual SDValue 4770 LowerCall(CallLoweringInfo &/*CLI*/, 4771 SmallVectorImpl<SDValue> &/*InVals*/) const { 4772 llvm_unreachable("Not Implemented"); 4773 } 4774 4775 /// Target-specific cleanup for formal ByVal parameters. 4776 virtual void HandleByVal(CCState *, unsigned &, Align) const {} 4777 4778 /// This hook should be implemented to check whether the return values 4779 /// described by the Outs array can fit into the return registers. If false 4780 /// is returned, an sret-demotion is performed. 4781 virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, 4782 MachineFunction &/*MF*/, bool /*isVarArg*/, 4783 const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, 4784 LLVMContext &/*Context*/, const Type *RetTy) const 4785 { 4786 // Return true by default to get preexisting behavior. 4787 return true; 4788 } 4789 4790 /// This hook must be implemented to lower outgoing return values, described 4791 /// by the Outs array, into the specified DAG. The implementation should 4792 /// return the resulting token chain value. 4793 virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, 4794 bool /*isVarArg*/, 4795 const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, 4796 const SmallVectorImpl<SDValue> & /*OutVals*/, 4797 const SDLoc & /*dl*/, 4798 SelectionDAG & /*DAG*/) const { 4799 llvm_unreachable("Not Implemented"); 4800 } 4801 4802 /// Return true if result of the specified node is used by a return node 4803 /// only. It also compute and return the input chain for the tail call. 4804 /// 4805 /// This is used to determine whether it is possible to codegen a libcall as 4806 /// tail call at legalization time. 4807 virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { 4808 return false; 4809 } 4810 4811 /// Return true if the target may be able emit the call instruction as a tail 4812 /// call. This is used by optimization passes to determine if it's profitable 4813 /// to duplicate return instructions to enable tailcall optimization. 4814 virtual bool mayBeEmittedAsTailCall(const CallInst *) const { 4815 return false; 4816 } 4817 4818 /// Return the register ID of the name passed in. Used by named register 4819 /// global variables extension. There is no target-independent behaviour 4820 /// so the default action is to bail. 4821 virtual Register getRegisterByName(const char* RegName, LLT Ty, 4822 const MachineFunction &MF) const { 4823 report_fatal_error("Named registers not implemented for this target"); 4824 } 4825 4826 /// Return the type that should be used to zero or sign extend a 4827 /// zeroext/signext integer return value. FIXME: Some C calling conventions 4828 /// require the return type to be promoted, but this is not true all the time, 4829 /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling 4830 /// conventions. The frontend should handle this and include all of the 4831 /// necessary information. 4832 virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 4833 ISD::NodeType /*ExtendKind*/) const { 4834 EVT MinVT = getRegisterType(MVT::i32); 4835 return VT.bitsLT(MinVT) ? MinVT : VT; 4836 } 4837 4838 /// For some targets, an LLVM struct type must be broken down into multiple 4839 /// simple types, but the calling convention specifies that the entire struct 4840 /// must be passed in a block of consecutive registers. 4841 virtual bool 4842 functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, 4843 bool isVarArg, 4844 const DataLayout &DL) const { 4845 return false; 4846 } 4847 4848 /// For most targets, an LLVM type must be broken down into multiple 4849 /// smaller types. Usually the halves are ordered according to the endianness 4850 /// but for some platform that would break. So this method will default to 4851 /// matching the endianness but can be overridden. 4852 virtual bool 4853 shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { 4854 return DL.isLittleEndian(); 4855 } 4856 4857 /// Returns a 0 terminated array of registers that can be safely used as 4858 /// scratch registers. 4859 virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { 4860 return nullptr; 4861 } 4862 4863 /// Returns a 0 terminated array of rounding control registers that can be 4864 /// attached into strict FP call. 4865 virtual ArrayRef<MCPhysReg> getRoundingControlRegisters() const { 4866 return ArrayRef<MCPhysReg>(); 4867 } 4868 4869 /// This callback is used to prepare for a volatile or atomic load. 4870 /// It takes a chain node as input and returns the chain for the load itself. 4871 /// 4872 /// Having a callback like this is necessary for targets like SystemZ, 4873 /// which allows a CPU to reuse the result of a previous load indefinitely, 4874 /// even if a cache-coherent store is performed by another CPU. The default 4875 /// implementation does nothing. 4876 virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, 4877 SelectionDAG &DAG) const { 4878 return Chain; 4879 } 4880 4881 /// This callback is invoked by the type legalizer to legalize nodes with an 4882 /// illegal operand type but legal result types. It replaces the 4883 /// LowerOperation callback in the type Legalizer. The reason we can not do 4884 /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to 4885 /// use this callback. 4886 /// 4887 /// TODO: Consider merging with ReplaceNodeResults. 4888 /// 4889 /// The target places new result values for the node in Results (their number 4890 /// and types must exactly match those of the original return values of 4891 /// the node), or leaves Results empty, which indicates that the node is not 4892 /// to be custom lowered after all. 4893 /// The default implementation calls LowerOperation. 4894 virtual void LowerOperationWrapper(SDNode *N, 4895 SmallVectorImpl<SDValue> &Results, 4896 SelectionDAG &DAG) const; 4897 4898 /// This callback is invoked for operations that are unsupported by the 4899 /// target, which are registered to use 'custom' lowering, and whose defined 4900 /// values are all legal. If the target has no operations that require custom 4901 /// lowering, it need not implement this. The default implementation of this 4902 /// aborts. 4903 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; 4904 4905 /// This callback is invoked when a node result type is illegal for the 4906 /// target, and the operation was registered to use 'custom' lowering for that 4907 /// result type. The target places new result values for the node in Results 4908 /// (their number and types must exactly match those of the original return 4909 /// values of the node), or leaves Results empty, which indicates that the 4910 /// node is not to be custom lowered after all. 4911 /// 4912 /// If the target has no operations that require custom lowering, it need not 4913 /// implement this. The default implementation aborts. 4914 virtual void ReplaceNodeResults(SDNode * /*N*/, 4915 SmallVectorImpl<SDValue> &/*Results*/, 4916 SelectionDAG &/*DAG*/) const { 4917 llvm_unreachable("ReplaceNodeResults not implemented for this target!"); 4918 } 4919 4920 /// This method returns the name of a target specific DAG node. 4921 virtual const char *getTargetNodeName(unsigned Opcode) const; 4922 4923 /// This method returns a target specific FastISel object, or null if the 4924 /// target does not support "fast" ISel. 4925 virtual FastISel *createFastISel(FunctionLoweringInfo &, 4926 const TargetLibraryInfo *) const { 4927 return nullptr; 4928 } 4929 4930 bool verifyReturnAddressArgumentIsConstant(SDValue Op, 4931 SelectionDAG &DAG) const; 4932 4933 #ifndef NDEBUG 4934 /// Check the given SDNode. Aborts if it is invalid. 4935 virtual void verifyTargetSDNode(const SDNode *N) const {}; 4936 #endif 4937 4938 //===--------------------------------------------------------------------===// 4939 // Inline Asm Support hooks 4940 // 4941 4942 /// This hook allows the target to expand an inline asm call to be explicit 4943 /// llvm code if it wants to. This is useful for turning simple inline asms 4944 /// into LLVM intrinsics, which gives the compiler more information about the 4945 /// behavior of the code. 4946 virtual bool ExpandInlineAsm(CallInst *) const { 4947 return false; 4948 } 4949 4950 enum ConstraintType { 4951 C_Register, // Constraint represents specific register(s). 4952 C_RegisterClass, // Constraint represents any of register(s) in class. 4953 C_Memory, // Memory constraint. 4954 C_Address, // Address constraint. 4955 C_Immediate, // Requires an immediate. 4956 C_Other, // Something else. 4957 C_Unknown // Unsupported constraint. 4958 }; 4959 4960 enum ConstraintWeight { 4961 // Generic weights. 4962 CW_Invalid = -1, // No match. 4963 CW_Okay = 0, // Acceptable. 4964 CW_Good = 1, // Good weight. 4965 CW_Better = 2, // Better weight. 4966 CW_Best = 3, // Best weight. 4967 4968 // Well-known weights. 4969 CW_SpecificReg = CW_Okay, // Specific register operands. 4970 CW_Register = CW_Good, // Register operands. 4971 CW_Memory = CW_Better, // Memory operands. 4972 CW_Constant = CW_Best, // Constant operand. 4973 CW_Default = CW_Okay // Default or don't know type. 4974 }; 4975 4976 /// This contains information for each constraint that we are lowering. 4977 struct AsmOperandInfo : public InlineAsm::ConstraintInfo { 4978 /// This contains the actual string for the code, like "m". TargetLowering 4979 /// picks the 'best' code from ConstraintInfo::Codes that most closely 4980 /// matches the operand. 4981 std::string ConstraintCode; 4982 4983 /// Information about the constraint code, e.g. Register, RegisterClass, 4984 /// Memory, Other, Unknown. 4985 TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; 4986 4987 /// If this is the result output operand or a clobber, this is null, 4988 /// otherwise it is the incoming operand to the CallInst. This gets 4989 /// modified as the asm is processed. 4990 Value *CallOperandVal = nullptr; 4991 4992 /// The ValueType for the operand value. 4993 MVT ConstraintVT = MVT::Other; 4994 4995 /// Copy constructor for copying from a ConstraintInfo. 4996 AsmOperandInfo(InlineAsm::ConstraintInfo Info) 4997 : InlineAsm::ConstraintInfo(std::move(Info)) {} 4998 4999 /// Return true of this is an input operand that is a matching constraint 5000 /// like "4". 5001 bool isMatchingInputConstraint() const; 5002 5003 /// If this is an input matching constraint, this method returns the output 5004 /// operand it matches. 5005 unsigned getMatchedOperand() const; 5006 }; 5007 5008 using AsmOperandInfoVector = std::vector<AsmOperandInfo>; 5009 5010 /// Split up the constraint string from the inline assembly value into the 5011 /// specific constraints and their prefixes, and also tie in the associated 5012 /// operand values. If this returns an empty vector, and if the constraint 5013 /// string itself isn't empty, there was an error parsing. 5014 virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, 5015 const TargetRegisterInfo *TRI, 5016 const CallBase &Call) const; 5017 5018 /// Examine constraint type and operand type and determine a weight value. 5019 /// The operand object must already have been set up with the operand type. 5020 virtual ConstraintWeight getMultipleConstraintMatchWeight( 5021 AsmOperandInfo &info, int maIndex) const; 5022 5023 /// Examine constraint string and operand type and determine a weight value. 5024 /// The operand object must already have been set up with the operand type. 5025 virtual ConstraintWeight getSingleConstraintMatchWeight( 5026 AsmOperandInfo &info, const char *constraint) const; 5027 5028 /// Determines the constraint code and constraint type to use for the specific 5029 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. 5030 /// If the actual operand being passed in is available, it can be passed in as 5031 /// Op, otherwise an empty SDValue can be passed. 5032 virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, 5033 SDValue Op, 5034 SelectionDAG *DAG = nullptr) const; 5035 5036 /// Given a constraint, return the type of constraint it is for this target. 5037 virtual ConstraintType getConstraintType(StringRef Constraint) const; 5038 5039 using ConstraintPair = std::pair<StringRef, TargetLowering::ConstraintType>; 5040 using ConstraintGroup = SmallVector<ConstraintPair>; 5041 /// Given an OpInfo with list of constraints codes as strings, return a 5042 /// sorted Vector of pairs of constraint codes and their types in priority of 5043 /// what we'd prefer to lower them as. This may contain immediates that 5044 /// cannot be lowered, but it is meant to be a machine agnostic order of 5045 /// preferences. 5046 ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const; 5047 5048 /// Given a physical register constraint (e.g. {edx}), return the register 5049 /// number and the register class for the register. 5050 /// 5051 /// Given a register class constraint, like 'r', if this corresponds directly 5052 /// to an LLVM register class, return a register of 0 and the register class 5053 /// pointer. 5054 /// 5055 /// This should only be used for C_Register constraints. On error, this 5056 /// returns a register number of 0 and a null register class pointer. 5057 virtual std::pair<unsigned, const TargetRegisterClass *> 5058 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 5059 StringRef Constraint, MVT VT) const; 5060 5061 virtual InlineAsm::ConstraintCode 5062 getInlineAsmMemConstraint(StringRef ConstraintCode) const { 5063 if (ConstraintCode == "m") 5064 return InlineAsm::ConstraintCode::m; 5065 if (ConstraintCode == "o") 5066 return InlineAsm::ConstraintCode::o; 5067 if (ConstraintCode == "X") 5068 return InlineAsm::ConstraintCode::X; 5069 if (ConstraintCode == "p") 5070 return InlineAsm::ConstraintCode::p; 5071 return InlineAsm::ConstraintCode::Unknown; 5072 } 5073 5074 /// Try to replace an X constraint, which matches anything, with another that 5075 /// has more specific requirements based on the type of the corresponding 5076 /// operand. This returns null if there is no replacement to make. 5077 virtual const char *LowerXConstraint(EVT ConstraintVT) const; 5078 5079 /// Lower the specified operand into the Ops vector. If it is invalid, don't 5080 /// add anything to Ops. 5081 virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 5082 std::vector<SDValue> &Ops, 5083 SelectionDAG &DAG) const; 5084 5085 // Lower custom output constraints. If invalid, return SDValue(). 5086 virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, 5087 const SDLoc &DL, 5088 const AsmOperandInfo &OpInfo, 5089 SelectionDAG &DAG) const; 5090 5091 // Targets may override this function to collect operands from the CallInst 5092 // and for example, lower them into the SelectionDAG operands. 5093 virtual void CollectTargetIntrinsicOperands(const CallInst &I, 5094 SmallVectorImpl<SDValue> &Ops, 5095 SelectionDAG &DAG) const; 5096 5097 //===--------------------------------------------------------------------===// 5098 // Div utility functions 5099 // 5100 5101 SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, 5102 bool IsAfterLegalTypes, 5103 SmallVectorImpl<SDNode *> &Created) const; 5104 SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, 5105 bool IsAfterLegalTypes, 5106 SmallVectorImpl<SDNode *> &Created) const; 5107 // Build sdiv by power-of-2 with conditional move instructions 5108 SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, 5109 SelectionDAG &DAG, 5110 SmallVectorImpl<SDNode *> &Created) const; 5111 5112 /// Targets may override this function to provide custom SDIV lowering for 5113 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM 5114 /// assumes SDIV is expensive and replaces it with a series of other integer 5115 /// operations. 5116 virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, 5117 SelectionDAG &DAG, 5118 SmallVectorImpl<SDNode *> &Created) const; 5119 5120 /// Targets may override this function to provide custom SREM lowering for 5121 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM 5122 /// assumes SREM is expensive and replaces it with a series of other integer 5123 /// operations. 5124 virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, 5125 SelectionDAG &DAG, 5126 SmallVectorImpl<SDNode *> &Created) const; 5127 5128 /// Indicate whether this target prefers to combine FDIVs with the same 5129 /// divisor. If the transform should never be done, return zero. If the 5130 /// transform should be done, return the minimum number of divisor uses 5131 /// that must exist. 5132 virtual unsigned combineRepeatedFPDivisors() const { 5133 return 0; 5134 } 5135 5136 /// Hooks for building estimates in place of slower divisions and square 5137 /// roots. 5138 5139 /// Return either a square root or its reciprocal estimate value for the input 5140 /// operand. 5141 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or 5142 /// 'Enabled' as set by a potential default override attribute. 5143 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson 5144 /// refinement iterations required to generate a sufficient (though not 5145 /// necessarily IEEE-754 compliant) estimate is returned in that parameter. 5146 /// The boolean UseOneConstNR output is used to select a Newton-Raphson 5147 /// algorithm implementation that uses either one or two constants. 5148 /// The boolean Reciprocal is used to select whether the estimate is for the 5149 /// square root of the input operand or the reciprocal of its square root. 5150 /// A target may choose to implement its own refinement within this function. 5151 /// If that's true, then return '0' as the number of RefinementSteps to avoid 5152 /// any further refinement of the estimate. 5153 /// An empty SDValue return means no estimate sequence can be created. 5154 virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, 5155 int Enabled, int &RefinementSteps, 5156 bool &UseOneConstNR, bool Reciprocal) const { 5157 return SDValue(); 5158 } 5159 5160 /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is 5161 /// required for correctness since InstCombine might have canonicalized a 5162 /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall 5163 /// through to the default expansion/soften to libcall, we might introduce a 5164 /// link-time dependency on libm into a file that originally did not have one. 5165 SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const; 5166 5167 /// Return a reciprocal estimate value for the input operand. 5168 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or 5169 /// 'Enabled' as set by a potential default override attribute. 5170 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson 5171 /// refinement iterations required to generate a sufficient (though not 5172 /// necessarily IEEE-754 compliant) estimate is returned in that parameter. 5173 /// A target may choose to implement its own refinement within this function. 5174 /// If that's true, then return '0' as the number of RefinementSteps to avoid 5175 /// any further refinement of the estimate. 5176 /// An empty SDValue return means no estimate sequence can be created. 5177 virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, 5178 int Enabled, int &RefinementSteps) const { 5179 return SDValue(); 5180 } 5181 5182 /// Return a target-dependent comparison result if the input operand is 5183 /// suitable for use with a square root estimate calculation. For example, the 5184 /// comparison may check if the operand is NAN, INF, zero, normal, etc. The 5185 /// result should be used as the condition operand for a select or branch. 5186 virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 5187 const DenormalMode &Mode) const; 5188 5189 /// Return a target-dependent result if the input operand is not suitable for 5190 /// use with a square root estimate calculation. 5191 virtual SDValue getSqrtResultForDenormInput(SDValue Operand, 5192 SelectionDAG &DAG) const { 5193 return DAG.getConstantFP(0.0, SDLoc(Operand), Operand.getValueType()); 5194 } 5195 5196 //===--------------------------------------------------------------------===// 5197 // Legalization utility functions 5198 // 5199 5200 /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, 5201 /// respectively, each computing an n/2-bit part of the result. 5202 /// \param Result A vector that will be filled with the parts of the result 5203 /// in little-endian order. 5204 /// \param LL Low bits of the LHS of the MUL. You can use this parameter 5205 /// if you want to control how low bits are extracted from the LHS. 5206 /// \param LH High bits of the LHS of the MUL. See LL for meaning. 5207 /// \param RL Low bits of the RHS of the MUL. See LL for meaning 5208 /// \param RH High bits of the RHS of the MUL. See LL for meaning. 5209 /// \returns true if the node has been expanded, false if it has not 5210 bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, 5211 SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, 5212 SelectionDAG &DAG, MulExpansionKind Kind, 5213 SDValue LL = SDValue(), SDValue LH = SDValue(), 5214 SDValue RL = SDValue(), SDValue RH = SDValue()) const; 5215 5216 /// Expand a MUL into two nodes. One that computes the high bits of 5217 /// the result and one that computes the low bits. 5218 /// \param HiLoVT The value type to use for the Lo and Hi nodes. 5219 /// \param LL Low bits of the LHS of the MUL. You can use this parameter 5220 /// if you want to control how low bits are extracted from the LHS. 5221 /// \param LH High bits of the LHS of the MUL. See LL for meaning. 5222 /// \param RL Low bits of the RHS of the MUL. See LL for meaning 5223 /// \param RH High bits of the RHS of the MUL. See LL for meaning. 5224 /// \returns true if the node has been expanded. false if it has not 5225 bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, 5226 SelectionDAG &DAG, MulExpansionKind Kind, 5227 SDValue LL = SDValue(), SDValue LH = SDValue(), 5228 SDValue RL = SDValue(), SDValue RH = SDValue()) const; 5229 5230 /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit 5231 /// urem by constant and other arithmetic ops. The n/2-bit urem by constant 5232 /// will be expanded by DAGCombiner. This is not possible for all constant 5233 /// divisors. 5234 /// \param N Node to expand 5235 /// \param Result A vector that will be filled with the lo and high parts of 5236 /// the results. For *DIVREM, this will be the quotient parts followed 5237 /// by the remainder parts. 5238 /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be 5239 /// half of VT. 5240 /// \param LL Low bits of the LHS of the operation. You can use this 5241 /// parameter if you want to control how low bits are extracted from 5242 /// the LHS. 5243 /// \param LH High bits of the LHS of the operation. See LL for meaning. 5244 /// \returns true if the node has been expanded, false if it has not. 5245 bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result, 5246 EVT HiLoVT, SelectionDAG &DAG, 5247 SDValue LL = SDValue(), 5248 SDValue LH = SDValue()) const; 5249 5250 /// Expand funnel shift. 5251 /// \param N Node to expand 5252 /// \returns The expansion if successful, SDValue() otherwise 5253 SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const; 5254 5255 /// Expand rotations. 5256 /// \param N Node to expand 5257 /// \param AllowVectorOps expand vector rotate, this should only be performed 5258 /// if the legalization is happening outside of LegalizeVectorOps 5259 /// \returns The expansion if successful, SDValue() otherwise 5260 SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const; 5261 5262 /// Expand shift-by-parts. 5263 /// \param N Node to expand 5264 /// \param Lo lower-output-part after conversion 5265 /// \param Hi upper-output-part after conversion 5266 void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, 5267 SelectionDAG &DAG) const; 5268 5269 /// Expand float(f32) to SINT(i64) conversion 5270 /// \param N Node to expand 5271 /// \param Result output after conversion 5272 /// \returns True, if the expansion was successful, false otherwise 5273 bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 5274 5275 /// Expand float to UINT conversion 5276 /// \param N Node to expand 5277 /// \param Result output after conversion 5278 /// \param Chain output chain after conversion 5279 /// \returns True, if the expansion was successful, false otherwise 5280 bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, 5281 SelectionDAG &DAG) const; 5282 5283 /// Expand UINT(i64) to double(f64) conversion 5284 /// \param N Node to expand 5285 /// \param Result output after conversion 5286 /// \param Chain output chain after conversion 5287 /// \returns True, if the expansion was successful, false otherwise 5288 bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, 5289 SelectionDAG &DAG) const; 5290 5291 /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. 5292 SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; 5293 5294 /// Expand fminimum/fmaximum into multiple comparison with selects. 5295 SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const; 5296 5297 /// Expand fminimumnum/fmaximumnum into multiple comparison with selects. 5298 SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const; 5299 5300 /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. 5301 /// \param N Node to expand 5302 /// \returns The expansion result 5303 SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const; 5304 5305 /// Truncate Op to ResultVT. If the result is exact, leave it alone. If it is 5306 /// not exact, force the result to be odd. 5307 /// \param ResultVT The type of result. 5308 /// \param Op The value to round. 5309 /// \returns The expansion result 5310 SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, 5311 SelectionDAG &DAG) const; 5312 5313 /// Expand round(fp) to fp conversion 5314 /// \param N Node to expand 5315 /// \returns The expansion result 5316 SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const; 5317 5318 /// Expand check for floating point class. 5319 /// \param ResultVT The type of intrinsic call result. 5320 /// \param Op The tested value. 5321 /// \param Test The test to perform. 5322 /// \param Flags The optimization flags. 5323 /// \returns The expansion result or SDValue() if it fails. 5324 SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, 5325 SDNodeFlags Flags, const SDLoc &DL, 5326 SelectionDAG &DAG) const; 5327 5328 /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, 5329 /// vector nodes can only succeed if all operations are legal/custom. 5330 /// \param N Node to expand 5331 /// \returns The expansion result or SDValue() if it fails. 5332 SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const; 5333 5334 /// Expand VP_CTPOP nodes. 5335 /// \returns The expansion result or SDValue() if it fails. 5336 SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const; 5337 5338 /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, 5339 /// vector nodes can only succeed if all operations are legal/custom. 5340 /// \param N Node to expand 5341 /// \returns The expansion result or SDValue() if it fails. 5342 SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const; 5343 5344 /// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes. 5345 /// \param N Node to expand 5346 /// \returns The expansion result or SDValue() if it fails. 5347 SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const; 5348 5349 /// Expand CTTZ via Table Lookup. 5350 /// \param N Node to expand 5351 /// \returns The expansion result or SDValue() if it fails. 5352 SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5353 SDValue Op, unsigned NumBitsPerElt) const; 5354 5355 /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, 5356 /// vector nodes can only succeed if all operations are legal/custom. 5357 /// \param N Node to expand 5358 /// \returns The expansion result or SDValue() if it fails. 5359 SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const; 5360 5361 /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes. 5362 /// \param N Node to expand 5363 /// \returns The expansion result or SDValue() if it fails. 5364 SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const; 5365 5366 /// Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes. 5367 /// \param N Node to expand 5368 /// \returns The expansion result or SDValue() if it fails. 5369 SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const; 5370 5371 /// Expand VECTOR_FIND_LAST_ACTIVE nodes 5372 /// \param N Node to expand 5373 /// \returns The expansion result or SDValue() if it fails. 5374 SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const; 5375 5376 /// Expand ABS nodes. Expands vector/scalar ABS nodes, 5377 /// vector nodes can only succeed if all operations are legal/custom. 5378 /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) 5379 /// \param N Node to expand 5380 /// \param IsNegative indicate negated abs 5381 /// \returns The expansion result or SDValue() if it fails. 5382 SDValue expandABS(SDNode *N, SelectionDAG &DAG, 5383 bool IsNegative = false) const; 5384 5385 /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes. 5386 /// \param N Node to expand 5387 /// \returns The expansion result or SDValue() if it fails. 5388 SDValue expandABD(SDNode *N, SelectionDAG &DAG) const; 5389 5390 /// Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes. 5391 /// \param N Node to expand 5392 /// \returns The expansion result or SDValue() if it fails. 5393 SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const; 5394 5395 /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64 5396 /// scalar types. Returns SDValue() if expand fails. 5397 /// \param N Node to expand 5398 /// \returns The expansion result or SDValue() if it fails. 5399 SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const; 5400 5401 /// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with 5402 /// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node 5403 /// to expand \returns The expansion result or SDValue() if it fails. 5404 SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const; 5405 5406 /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes. 5407 /// Returns SDValue() if expand fails. 5408 /// \param N Node to expand 5409 /// \returns The expansion result or SDValue() if it fails. 5410 SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const; 5411 5412 /// Expand VP_BITREVERSE nodes. Expands VP_BITREVERSE nodes with 5413 /// i8/i16/i32/i64 scalar types. \param N Node to expand \returns The 5414 /// expansion result or SDValue() if it fails. 5415 SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const; 5416 5417 /// Turn load of vector type into a load of the individual elements. 5418 /// \param LD load to expand 5419 /// \returns BUILD_VECTOR and TokenFactor nodes. 5420 std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD, 5421 SelectionDAG &DAG) const; 5422 5423 // Turn a store of a vector type into stores of the individual elements. 5424 /// \param ST Store with a vector value type 5425 /// \returns TokenFactor of the individual store chains. 5426 SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; 5427 5428 /// Expands an unaligned load to 2 half-size loads for an integer, and 5429 /// possibly more for vectors. 5430 std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, 5431 SelectionDAG &DAG) const; 5432 5433 /// Expands an unaligned store to 2 half-size stores for integer values, and 5434 /// possibly more for vectors. 5435 SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; 5436 5437 /// Increments memory address \p Addr according to the type of the value 5438 /// \p DataVT that should be stored. If the data is stored in compressed 5439 /// form, the memory address should be incremented according to the number of 5440 /// the stored elements. This number is equal to the number of '1's bits 5441 /// in the \p Mask. 5442 /// \p DataVT is a vector type. \p Mask is a vector value. 5443 /// \p DataVT and \p Mask have the same number of vector elements. 5444 SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, 5445 EVT DataVT, SelectionDAG &DAG, 5446 bool IsCompressedMemory) const; 5447 5448 /// Get a pointer to vector element \p Idx located in memory for a vector of 5449 /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of 5450 /// bounds the returned pointer is unspecified, but will be within the vector 5451 /// bounds. 5452 SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, 5453 SDValue Index) const; 5454 5455 /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located 5456 /// in memory for a vector of type \p VecVT starting at a base address of 5457 /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the 5458 /// returned pointer is unspecified, but the value returned will be such that 5459 /// the entire subvector would be within the vector bounds. 5460 SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, 5461 EVT SubVecVT, SDValue Index) const; 5462 5463 /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This 5464 /// method accepts integers as its arguments. 5465 SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const; 5466 5467 /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This 5468 /// method accepts integers as its arguments. 5469 SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; 5470 5471 /// Method for building the DAG expansion of ISD::[US]CMP. This 5472 /// method accepts integers as its arguments 5473 SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const; 5474 5475 /// Method for building the DAG expansion of ISD::[US]SHLSAT. This 5476 /// method accepts integers as its arguments. 5477 SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; 5478 5479 /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This 5480 /// method accepts integers as its arguments. 5481 SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; 5482 5483 /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This 5484 /// method accepts integers as its arguments. 5485 /// Note: This method may fail if the division could not be performed 5486 /// within the type. Clients must retry with a wider type if this happens. 5487 SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, 5488 SDValue LHS, SDValue RHS, 5489 unsigned Scale, SelectionDAG &DAG) const; 5490 5491 /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion 5492 /// always suceeds and populates the Result and Overflow arguments. 5493 void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, 5494 SelectionDAG &DAG) const; 5495 5496 /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion 5497 /// always suceeds and populates the Result and Overflow arguments. 5498 void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, 5499 SelectionDAG &DAG) const; 5500 5501 /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether 5502 /// expansion was successful and populates the Result and Overflow arguments. 5503 bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, 5504 SelectionDAG &DAG) const; 5505 5506 /// Calculate the product twice the width of LHS and RHS. If HiLHS/HiRHS are 5507 /// non-null they will be included in the multiplication. The expansion works 5508 /// by splitting the 2 inputs into 4 pieces that we can multiply and add 5509 /// together without neding MULH or MUL_LOHI. 5510 void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, 5511 SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, 5512 SDValue HiLHS = SDValue(), 5513 SDValue HiRHS = SDValue()) const; 5514 5515 /// Calculate full product of LHS and RHS either via a libcall or through 5516 /// brute force expansion of the multiplication. The expansion works by 5517 /// splitting the 2 inputs into 4 pieces that we can multiply and add together 5518 /// without needing MULH or MUL_LOHI. 5519 void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, 5520 const SDValue LHS, const SDValue RHS, SDValue &Lo, 5521 SDValue &Hi) const; 5522 5523 /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, 5524 /// only the first Count elements of the vector are used. 5525 SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; 5526 5527 /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation. 5528 SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const; 5529 5530 /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. 5531 /// Returns true if the expansion was successful. 5532 bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; 5533 5534 /// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This 5535 /// method accepts vectors as its arguments. 5536 SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const; 5537 5538 /// Expand a vector VECTOR_COMPRESS into a sequence of extract element, store 5539 /// temporarily, advance store position, before re-loading the final vector. 5540 SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const; 5541 5542 /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC 5543 /// on the current target. A VP_SETCC will additionally be given a Mask 5544 /// and/or EVL not equal to SDValue(). 5545 /// 5546 /// If the SETCC has been legalized using AND / OR, then the legalized node 5547 /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert 5548 /// will be set to false. This will also hold if the VP_SETCC has been 5549 /// legalized using VP_AND / VP_OR. 5550 /// 5551 /// If the SETCC / VP_SETCC has been legalized by using 5552 /// getSetCCSwappedOperands(), then the values of LHS and RHS will be 5553 /// swapped, CC will be set to the new condition, and NeedInvert will be set 5554 /// to false. 5555 /// 5556 /// If the SETCC / VP_SETCC has been legalized using the inverse condcode, 5557 /// then LHS and RHS will be unchanged, CC will set to the inverted condcode, 5558 /// and NeedInvert will be set to true. The caller must invert the result of 5559 /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to 5560 /// swap the effect of a true/false result. 5561 /// 5562 /// \returns true if the SETCC / VP_SETCC has been legalized, false if it 5563 /// hasn't. 5564 bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, 5565 SDValue &RHS, SDValue &CC, SDValue Mask, 5566 SDValue EVL, bool &NeedInvert, const SDLoc &dl, 5567 SDValue &Chain, bool IsSignaling = false) const; 5568 5569 //===--------------------------------------------------------------------===// 5570 // Instruction Emitting Hooks 5571 // 5572 5573 /// This method should be implemented by targets that mark instructions with 5574 /// the 'usesCustomInserter' flag. These instructions are special in various 5575 /// ways, which require special support to insert. The specified MachineInstr 5576 /// is created but not inserted into any basic blocks, and this method is 5577 /// called to expand it into a sequence of instructions, potentially also 5578 /// creating new basic blocks and control flow. 5579 /// As long as the returned basic block is different (i.e., we created a new 5580 /// one), the custom inserter is free to modify the rest of \p MBB. 5581 virtual MachineBasicBlock * 5582 EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 5583 5584 /// This method should be implemented by targets that mark instructions with 5585 /// the 'hasPostISelHook' flag. These instructions must be adjusted after 5586 /// instruction selection by target hooks. e.g. To fill in optional defs for 5587 /// ARM 's' setting instructions. 5588 virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, 5589 SDNode *Node) const; 5590 5591 /// If this function returns true, SelectionDAGBuilder emits a 5592 /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. 5593 virtual bool useLoadStackGuardNode(const Module &M) const { return false; } 5594 5595 virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 5596 const SDLoc &DL) const { 5597 llvm_unreachable("not implemented for this target"); 5598 } 5599 5600 /// Lower TLS global address SDNode for target independent emulated TLS model. 5601 virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, 5602 SelectionDAG &DAG) const; 5603 5604 /// Expands target specific indirect branch for the case of JumpTable 5605 /// expansion. 5606 virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, 5607 SDValue Addr, int JTI, 5608 SelectionDAG &DAG) const; 5609 5610 // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) 5611 // If we're comparing for equality to zero and isCtlzFast is true, expose the 5612 // fact that this can be implemented as a ctlz/srl pair, so that the dag 5613 // combiner can fold the new nodes. 5614 SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; 5615 5616 // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y` 5617 virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const { 5618 return true; 5619 } 5620 5621 // Expand vector operation by dividing it into smaller length operations and 5622 // joining their results. SDValue() is returned when expansion did not happen. 5623 SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const; 5624 5625 private: 5626 SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 5627 const SDLoc &DL, DAGCombinerInfo &DCI) const; 5628 SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 5629 const SDLoc &DL, DAGCombinerInfo &DCI) const; 5630 5631 SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, 5632 SDValue N1, ISD::CondCode Cond, 5633 DAGCombinerInfo &DCI, 5634 const SDLoc &DL) const; 5635 5636 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 5637 SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( 5638 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, 5639 DAGCombinerInfo &DCI, const SDLoc &DL) const; 5640 5641 SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, 5642 SDValue CompTargetNode, ISD::CondCode Cond, 5643 DAGCombinerInfo &DCI, const SDLoc &DL, 5644 SmallVectorImpl<SDNode *> &Created) const; 5645 SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, 5646 ISD::CondCode Cond, DAGCombinerInfo &DCI, 5647 const SDLoc &DL) const; 5648 5649 SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, 5650 SDValue CompTargetNode, ISD::CondCode Cond, 5651 DAGCombinerInfo &DCI, const SDLoc &DL, 5652 SmallVectorImpl<SDNode *> &Created) const; 5653 SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, 5654 ISD::CondCode Cond, DAGCombinerInfo &DCI, 5655 const SDLoc &DL) const; 5656 }; 5657 5658 /// Given an LLVM IR type and return type attributes, compute the return value 5659 /// EVTs and flags, and optionally also the offsets, if the return value is 5660 /// being lowered to memory. 5661 void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, 5662 SmallVectorImpl<ISD::OutputArg> &Outs, 5663 const TargetLowering &TLI, const DataLayout &DL); 5664 5665 } // end namespace llvm 5666 5667 #endif // LLVM_CODEGEN_TARGETLOWERING_H 5668