1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "llvm/CodeGen/CallingConvLower.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include "llvm/IR/CallingConv.h" 22 #include "llvm/IR/Instruction.h" 23 24 namespace llvm { 25 26 namespace AArch64ISD { 27 28 // For predicated nodes where the result is a vector, the operation is 29 // controlled by a governing predicate and the inactive lanes are explicitly 30 // defined with a value, please stick the following naming convention: 31 // 32 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 33 // to source operand OP<n>. 34 // 35 // _MERGE_ZERO The result value is a vector with inactive lanes 36 // actively zeroed. 37 // 38 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 39 // to the last source operand which only purpose is being 40 // a passthru value. 41 // 42 // For other cases where no explicit action is needed to set the inactive lanes, 43 // or when the result is not a vector and it is needed or helpful to 44 // distinguish a node from similar unpredicated nodes, use: 45 // 46 // _PRED 47 // 48 enum NodeType : unsigned { 49 FIRST_NUMBER = ISD::BUILTIN_OP_END, 50 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 51 CALL, // Function call. 52 53 // Pseudo for a OBJC call that gets emitted together with a special `mov 54 // x29, x29` marker instruction. 55 CALL_RVMARKER, 56 57 CALL_BTI, // Function call followed by a BTI instruction. 58 59 // Function call, authenticating the callee value first: 60 // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands. 61 AUTH_CALL, 62 // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc, 63 // operands. 64 AUTH_TC_RETURN, 65 66 // Authenticated variant of CALL_RVMARKER. 67 AUTH_CALL_RVMARKER, 68 69 COALESCER_BARRIER, 70 71 VG_SAVE, 72 VG_RESTORE, 73 74 SMSTART, 75 SMSTOP, 76 RESTORE_ZA, 77 RESTORE_ZT, 78 SAVE_ZT, 79 80 // A call with the callee in x16, i.e. "blr x16". 81 CALL_ARM64EC_TO_X64, 82 83 // Produces the full sequence of instructions for getting the thread pointer 84 // offset of a variable into X0, using the TLSDesc model. 85 TLSDESC_CALLSEQ, 86 TLSDESC_AUTH_CALLSEQ, 87 ADRP, // Page address of a TargetGlobalAddress operand. 88 ADR, // ADR 89 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 90 LOADgot, // Load from automatically generated descriptor (e.g. Global 91 // Offset Table, TLS record). 92 RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. 93 BRCOND, // Conditional branch instruction; "b.cond". 94 CSEL, 95 CSINV, // Conditional select invert. 96 CSNEG, // Conditional select negate. 97 CSINC, // Conditional select increment. 98 99 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 100 // ELF. 101 THREAD_POINTER, 102 ADC, 103 SBC, // adc, sbc instructions 104 105 // To avoid stack clash, allocation is performed by block and each block is 106 // probed. 107 PROBED_ALLOCA, 108 109 // Predicated instructions where inactive lanes produce undefined results. 110 ABDS_PRED, 111 ABDU_PRED, 112 FADD_PRED, 113 FDIV_PRED, 114 FMA_PRED, 115 FMAX_PRED, 116 FMAXNM_PRED, 117 FMIN_PRED, 118 FMINNM_PRED, 119 FMUL_PRED, 120 FSUB_PRED, 121 HADDS_PRED, 122 HADDU_PRED, 123 MUL_PRED, 124 MULHS_PRED, 125 MULHU_PRED, 126 RHADDS_PRED, 127 RHADDU_PRED, 128 SDIV_PRED, 129 SHL_PRED, 130 SMAX_PRED, 131 SMIN_PRED, 132 SRA_PRED, 133 SRL_PRED, 134 UDIV_PRED, 135 UMAX_PRED, 136 UMIN_PRED, 137 138 // Unpredicated vector instructions 139 BIC, 140 141 SRAD_MERGE_OP1, 142 143 // Predicated instructions with the result of inactive lanes provided by the 144 // last operand. 145 FABS_MERGE_PASSTHRU, 146 FCEIL_MERGE_PASSTHRU, 147 FFLOOR_MERGE_PASSTHRU, 148 FNEARBYINT_MERGE_PASSTHRU, 149 FNEG_MERGE_PASSTHRU, 150 FRECPX_MERGE_PASSTHRU, 151 FRINT_MERGE_PASSTHRU, 152 FROUND_MERGE_PASSTHRU, 153 FROUNDEVEN_MERGE_PASSTHRU, 154 FSQRT_MERGE_PASSTHRU, 155 FTRUNC_MERGE_PASSTHRU, 156 FP_ROUND_MERGE_PASSTHRU, 157 FP_EXTEND_MERGE_PASSTHRU, 158 UINT_TO_FP_MERGE_PASSTHRU, 159 SINT_TO_FP_MERGE_PASSTHRU, 160 FCVTX_MERGE_PASSTHRU, 161 FCVTZU_MERGE_PASSTHRU, 162 FCVTZS_MERGE_PASSTHRU, 163 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 164 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 165 ABS_MERGE_PASSTHRU, 166 NEG_MERGE_PASSTHRU, 167 168 SETCC_MERGE_ZERO, 169 170 // Arithmetic instructions which write flags. 171 ADDS, 172 SUBS, 173 ADCS, 174 SBCS, 175 ANDS, 176 177 // Conditional compares. Operands: left,right,falsecc,cc,flags 178 CCMP, 179 CCMN, 180 FCCMP, 181 182 // Floating point comparison 183 FCMP, 184 185 // Scalar-to-vector duplication 186 DUP, 187 DUPLANE8, 188 DUPLANE16, 189 DUPLANE32, 190 DUPLANE64, 191 DUPLANE128, 192 193 // Vector immedate moves 194 MOVI, 195 MOVIshift, 196 MOVIedit, 197 MOVImsl, 198 FMOV, 199 MVNIshift, 200 MVNImsl, 201 202 // Vector immediate ops 203 BICi, 204 ORRi, 205 206 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 207 // element must be identical. 208 BSP, 209 210 // Vector shuffles 211 ZIP1, 212 ZIP2, 213 UZP1, 214 UZP2, 215 TRN1, 216 TRN2, 217 REV16, 218 REV32, 219 REV64, 220 EXT, 221 SPLICE, 222 223 // Vector shift by scalar 224 VSHL, 225 VLSHR, 226 VASHR, 227 228 // Vector shift by scalar (again) 229 SQSHL_I, 230 UQSHL_I, 231 SQSHLU_I, 232 SRSHR_I, 233 URSHR_I, 234 URSHR_I_PRED, 235 236 // Vector narrowing shift by immediate (bottom) 237 RSHRNB_I, 238 239 // Vector shift by constant and insert 240 VSLI, 241 VSRI, 242 243 // Vector comparisons 244 CMEQ, 245 CMGE, 246 CMGT, 247 CMHI, 248 CMHS, 249 FCMEQ, 250 FCMGE, 251 FCMGT, 252 253 // Vector zero comparisons 254 CMEQz, 255 CMGEz, 256 CMGTz, 257 CMLEz, 258 CMLTz, 259 FCMEQz, 260 FCMGEz, 261 FCMGTz, 262 FCMLEz, 263 FCMLTz, 264 265 // Round wide FP to narrow FP with inexact results to odd. 266 FCVTXN, 267 268 // Vector across-lanes addition 269 // Only the lower result lane is defined. 270 SADDV, 271 UADDV, 272 273 // Unsigned sum Long across Vector 274 UADDLV, 275 SADDLV, 276 277 // Wide adds 278 SADDWT, 279 SADDWB, 280 UADDWT, 281 UADDWB, 282 283 // Add Pairwise of two vectors 284 ADDP, 285 // Add Long Pairwise 286 SADDLP, 287 UADDLP, 288 289 // udot/sdot/usdot instructions 290 UDOT, 291 SDOT, 292 USDOT, 293 294 // Vector across-lanes min/max 295 // Only the lower result lane is defined. 296 SMINV, 297 UMINV, 298 SMAXV, 299 UMAXV, 300 301 SADDV_PRED, 302 UADDV_PRED, 303 SMAXV_PRED, 304 UMAXV_PRED, 305 SMINV_PRED, 306 UMINV_PRED, 307 ORV_PRED, 308 EORV_PRED, 309 ANDV_PRED, 310 311 // Compare-and-branch 312 CBZ, 313 CBNZ, 314 TBZ, 315 TBNZ, 316 317 // Tail calls 318 TC_RETURN, 319 320 // Custom prefetch handling 321 PREFETCH, 322 323 // {s|u}int to FP within a FP register. 324 SITOF, 325 UITOF, 326 327 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 328 /// world w.r.t vectors; which causes additional REV instructions to be 329 /// generated to compensate for the byte-swapping. But sometimes we do 330 /// need to re-interpret the data in SIMD vector registers in big-endian 331 /// mode without emitting such REV instructions. 332 NVCAST, 333 334 MRS, // MRS, also sets the flags via a glue. 335 336 SMULL, 337 UMULL, 338 339 PMULL, 340 341 // Reciprocal estimates and steps. 342 FRECPE, 343 FRECPS, 344 FRSQRTE, 345 FRSQRTS, 346 347 SUNPKHI, 348 SUNPKLO, 349 UUNPKHI, 350 UUNPKLO, 351 352 CLASTA_N, 353 CLASTB_N, 354 LASTA, 355 LASTB, 356 TBL, 357 358 // Floating-point reductions. 359 FADDA_PRED, 360 FADDV_PRED, 361 FMAXV_PRED, 362 FMAXNMV_PRED, 363 FMINV_PRED, 364 FMINNMV_PRED, 365 366 INSR, 367 PTEST, 368 PTEST_ANY, 369 PTRUE, 370 371 CTTZ_ELTS, 372 373 BITREVERSE_MERGE_PASSTHRU, 374 BSWAP_MERGE_PASSTHRU, 375 REVH_MERGE_PASSTHRU, 376 REVW_MERGE_PASSTHRU, 377 CTLZ_MERGE_PASSTHRU, 378 CTPOP_MERGE_PASSTHRU, 379 DUP_MERGE_PASSTHRU, 380 INDEX_VECTOR, 381 382 // Cast between vectors of the same element type but differ in length. 383 REINTERPRET_CAST, 384 385 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa 386 LS64_BUILD, 387 LS64_EXTRACT, 388 389 LD1_MERGE_ZERO, 390 LD1S_MERGE_ZERO, 391 LDNF1_MERGE_ZERO, 392 LDNF1S_MERGE_ZERO, 393 LDFF1_MERGE_ZERO, 394 LDFF1S_MERGE_ZERO, 395 LD1RQ_MERGE_ZERO, 396 LD1RO_MERGE_ZERO, 397 398 // Structured loads. 399 SVE_LD2_MERGE_ZERO, 400 SVE_LD3_MERGE_ZERO, 401 SVE_LD4_MERGE_ZERO, 402 403 // Unsigned gather loads. 404 GLD1_MERGE_ZERO, 405 GLD1_SCALED_MERGE_ZERO, 406 GLD1_UXTW_MERGE_ZERO, 407 GLD1_SXTW_MERGE_ZERO, 408 GLD1_UXTW_SCALED_MERGE_ZERO, 409 GLD1_SXTW_SCALED_MERGE_ZERO, 410 GLD1_IMM_MERGE_ZERO, 411 GLD1Q_MERGE_ZERO, 412 GLD1Q_INDEX_MERGE_ZERO, 413 414 // Signed gather loads 415 GLD1S_MERGE_ZERO, 416 GLD1S_SCALED_MERGE_ZERO, 417 GLD1S_UXTW_MERGE_ZERO, 418 GLD1S_SXTW_MERGE_ZERO, 419 GLD1S_UXTW_SCALED_MERGE_ZERO, 420 GLD1S_SXTW_SCALED_MERGE_ZERO, 421 GLD1S_IMM_MERGE_ZERO, 422 423 // Unsigned gather loads. 424 GLDFF1_MERGE_ZERO, 425 GLDFF1_SCALED_MERGE_ZERO, 426 GLDFF1_UXTW_MERGE_ZERO, 427 GLDFF1_SXTW_MERGE_ZERO, 428 GLDFF1_UXTW_SCALED_MERGE_ZERO, 429 GLDFF1_SXTW_SCALED_MERGE_ZERO, 430 GLDFF1_IMM_MERGE_ZERO, 431 432 // Signed gather loads. 433 GLDFF1S_MERGE_ZERO, 434 GLDFF1S_SCALED_MERGE_ZERO, 435 GLDFF1S_UXTW_MERGE_ZERO, 436 GLDFF1S_SXTW_MERGE_ZERO, 437 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 438 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 439 GLDFF1S_IMM_MERGE_ZERO, 440 441 // Non-temporal gather loads 442 GLDNT1_MERGE_ZERO, 443 GLDNT1_INDEX_MERGE_ZERO, 444 GLDNT1S_MERGE_ZERO, 445 446 // Contiguous masked store. 447 ST1_PRED, 448 449 // Scatter store 450 SST1_PRED, 451 SST1_SCALED_PRED, 452 SST1_UXTW_PRED, 453 SST1_SXTW_PRED, 454 SST1_UXTW_SCALED_PRED, 455 SST1_SXTW_SCALED_PRED, 456 SST1_IMM_PRED, 457 SST1Q_PRED, 458 SST1Q_INDEX_PRED, 459 460 // Non-temporal scatter store 461 SSTNT1_PRED, 462 SSTNT1_INDEX_PRED, 463 464 // SME 465 RDSVL, 466 REVD_MERGE_PASSTHRU, 467 ALLOCATE_ZA_BUFFER, 468 INIT_TPIDR2OBJ, 469 470 // Needed for __arm_agnostic("sme_za_state") 471 GET_SME_SAVE_SIZE, 472 ALLOC_SME_SAVE_BUFFER, 473 474 // Asserts that a function argument (i32) is zero-extended to i8 by 475 // the caller 476 ASSERT_ZEXT_BOOL, 477 478 // 128-bit system register accesses 479 // lo64, hi64, chain = MRRS(chain, sysregname) 480 MRRS, 481 // chain = MSRR(chain, sysregname, lo64, hi64) 482 MSRR, 483 484 // Strict (exception-raising) floating point comparison 485 FIRST_STRICTFP_OPCODE, 486 STRICT_FCMP = FIRST_STRICTFP_OPCODE, 487 STRICT_FCMPE, 488 LAST_STRICTFP_OPCODE = STRICT_FCMPE, 489 490 // NEON Load/Store with post-increment base updates 491 FIRST_MEMORY_OPCODE, 492 LD2post = FIRST_MEMORY_OPCODE, 493 LD3post, 494 LD4post, 495 ST2post, 496 ST3post, 497 ST4post, 498 LD1x2post, 499 LD1x3post, 500 LD1x4post, 501 ST1x2post, 502 ST1x3post, 503 ST1x4post, 504 LD1DUPpost, 505 LD2DUPpost, 506 LD3DUPpost, 507 LD4DUPpost, 508 LD1LANEpost, 509 LD2LANEpost, 510 LD3LANEpost, 511 LD4LANEpost, 512 ST2LANEpost, 513 ST3LANEpost, 514 ST4LANEpost, 515 516 STG, 517 STZG, 518 ST2G, 519 STZ2G, 520 521 LDP, 522 LDIAPP, 523 LDNP, 524 STP, 525 STILP, 526 STNP, 527 LAST_MEMORY_OPCODE = STNP, 528 529 // SME ZA loads and stores 530 SME_ZA_LDR, 531 SME_ZA_STR, 532 }; 533 534 } // end namespace AArch64ISD 535 536 namespace AArch64 { 537 /// Possible values of current rounding mode, which is specified in bits 538 /// 23:22 of FPCR. 539 enum Rounding { 540 RN = 0, // Round to Nearest 541 RP = 1, // Round towards Plus infinity 542 RM = 2, // Round towards Minus infinity 543 RZ = 3, // Round towards Zero 544 rmMask = 3 // Bit mask selecting rounding mode 545 }; 546 547 // Bit position of rounding mode bits in FPCR. 548 const unsigned RoundingBitsPos = 22; 549 550 // Reserved bits should be preserved when modifying FPCR. 551 const uint64_t ReservedFPControlBits = 0xfffffffff80040f8; 552 553 // Registers used to pass function arguments. 554 ArrayRef<MCPhysReg> getGPRArgRegs(); 555 ArrayRef<MCPhysReg> getFPRArgRegs(); 556 557 /// Maximum allowed number of unprobed bytes above SP at an ABI 558 /// boundary. 559 const unsigned StackProbeMaxUnprobedStack = 1024; 560 561 /// Maximum number of iterations to unroll for a constant size probing loop. 562 const unsigned StackProbeMaxLoopUnroll = 4; 563 564 } // namespace AArch64 565 566 class AArch64Subtarget; 567 568 class AArch64TargetLowering : public TargetLowering { 569 public: 570 explicit AArch64TargetLowering(const TargetMachine &TM, 571 const AArch64Subtarget &STI); 572 573 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op 574 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. 575 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, 576 SDValue N1) const override; 577 578 /// Selects the correct CCAssignFn for a given CallingConvention value. 579 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 580 581 /// Selects the correct CCAssignFn for a given CallingConvention value. 582 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 583 584 /// Determine which of the bits specified in Mask are known to be either zero 585 /// or one and return them in the KnownZero/KnownOne bitsets. 586 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 587 const APInt &DemandedElts, 588 const SelectionDAG &DAG, 589 unsigned Depth = 0) const override; 590 591 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 592 const APInt &DemandedElts, 593 const SelectionDAG &DAG, 594 unsigned Depth) const override; 595 596 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 597 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 598 // *DAG* representation of pointers will always be 64-bits. They will be 599 // truncated and extended when transferred to memory, but the 64-bit DAG 600 // allows us to use AArch64's addressing modes much more easily. 601 return MVT::getIntegerVT(64); 602 } 603 604 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 605 const APInt &DemandedElts, 606 TargetLoweringOpt &TLO) const override; 607 608 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 609 610 /// Returns true if the target allows unaligned memory accesses of the 611 /// specified type. 612 bool allowsMisalignedMemoryAccesses( 613 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 614 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 615 unsigned *Fast = nullptr) const override; 616 /// LLT variant. 617 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 618 Align Alignment, 619 MachineMemOperand::Flags Flags, 620 unsigned *Fast = nullptr) const override; 621 622 /// Provide custom lowering hooks for some operations. 623 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 624 625 const char *getTargetNodeName(unsigned Opcode) const override; 626 627 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 628 629 /// This method returns a target specific FastISel object, or null if the 630 /// target does not support "fast" ISel. 631 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 632 const TargetLibraryInfo *libInfo) const override; 633 634 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 635 636 bool isFPImmLegal(const APFloat &Imm, EVT VT, 637 bool ForCodeSize) const override; 638 639 /// Return true if the given shuffle mask can be codegen'd directly, or if it 640 /// should be stack expanded. 641 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 642 643 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' 644 /// shuffle mask can be codegen'd directly. 645 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; 646 647 /// Return the ISD::SETCC ValueType. 648 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 649 EVT VT) const override; 650 651 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 652 653 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 654 MachineBasicBlock *BB) const; 655 656 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 657 MachineBasicBlock *BB) const; 658 659 MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, 660 MachineBasicBlock *MBB) const; 661 662 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, 663 MachineInstr &MI, 664 MachineBasicBlock *BB) const; 665 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; 666 MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, 667 MachineInstr &MI, MachineBasicBlock *BB) const; 668 MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, 669 unsigned Opcode, bool Op0IsDef) const; 670 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; 671 MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI, 672 MachineBasicBlock *BB) const; 673 MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI, 674 MachineBasicBlock *BB) const; 675 MachineBasicBlock *EmitAllocateSMESaveBuffer(MachineInstr &MI, 676 MachineBasicBlock *BB) const; 677 MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI, 678 MachineBasicBlock *BB) const; 679 680 MachineBasicBlock * 681 EmitInstrWithCustomInserter(MachineInstr &MI, 682 MachineBasicBlock *MBB) const override; 683 684 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 685 MachineFunction &MF, 686 unsigned Intrinsic) const override; 687 688 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 689 EVT NewVT) const override; 690 691 bool shouldRemoveRedundantExtend(SDValue Op) const override; 692 693 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 694 bool isTruncateFree(EVT VT1, EVT VT2) const override; 695 696 bool isProfitableToHoist(Instruction *I) const override; 697 698 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 699 bool isZExtFree(EVT VT1, EVT VT2) const override; 700 bool isZExtFree(SDValue Val, EVT VT2) const override; 701 702 bool optimizeExtendOrTruncateConversion( 703 Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override; 704 705 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 706 707 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 708 709 bool lowerInterleavedLoad(LoadInst *LI, 710 ArrayRef<ShuffleVectorInst *> Shuffles, 711 ArrayRef<unsigned> Indices, 712 unsigned Factor) const override; 713 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 714 unsigned Factor) const override; 715 716 bool lowerDeinterleaveIntrinsicToLoad( 717 LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override; 718 719 bool lowerInterleaveIntrinsicToStore( 720 StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override; 721 722 bool isLegalAddImmediate(int64_t) const override; 723 bool isLegalAddScalableImmediate(int64_t) const override; 724 bool isLegalICmpImmediate(int64_t) const override; 725 726 bool isMulAddWithConstProfitable(SDValue AddNode, 727 SDValue ConstNode) const override; 728 729 bool shouldConsiderGEPOffsetSplit() const override; 730 731 EVT getOptimalMemOpType(const MemOp &Op, 732 const AttributeList &FuncAttributes) const override; 733 734 LLT getOptimalMemOpLLT(const MemOp &Op, 735 const AttributeList &FuncAttributes) const override; 736 737 /// Return true if the addressing mode represented by AM is legal for this 738 /// target, for a load/store of the specified type. 739 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 740 unsigned AS, 741 Instruction *I = nullptr) const override; 742 743 int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, 744 int64_t MaxOffset) const override; 745 746 /// Return true if an FMA operation is faster than a pair of fmul and fadd 747 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 748 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 749 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 750 EVT VT) const override; 751 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 752 753 bool generateFMAsInMachineCombiner(EVT VT, 754 CodeGenOptLevel OptLevel) const override; 755 756 /// Return true if the target has native support for 757 /// the specified value type and it is 'desirable' to use the type for the 758 /// given node type. 759 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 760 761 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 762 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; 763 764 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 765 bool isDesirableToCommuteWithShift(const SDNode *N, 766 CombineLevel Level) const override; 767 768 bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override { 769 return false; 770 } 771 772 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 773 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; 774 775 /// Return true if it is profitable to fold a pair of shifts into a mask. 776 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 777 CombineLevel Level) const override; 778 779 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 780 EVT VT) const override; 781 782 /// Returns true if it is beneficial to convert a load of a constant 783 /// to just the constant itself. 784 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 785 Type *Ty) const override; 786 787 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 788 /// with this index. 789 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 790 unsigned Index) const override; 791 792 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 793 bool MathUsed) const override { 794 // Using overflow ops for overflow checks only should beneficial on 795 // AArch64. 796 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 797 } 798 799 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 800 AtomicOrdering Ord) const override; 801 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 802 AtomicOrdering Ord) const override; 803 804 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 805 806 bool isOpSuitableForLDPSTP(const Instruction *I) const; 807 bool isOpSuitableForLSE128(const Instruction *I) const; 808 bool isOpSuitableForRCPC3(const Instruction *I) const; 809 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 810 bool 811 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override; 812 813 TargetLoweringBase::AtomicExpansionKind 814 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 815 TargetLoweringBase::AtomicExpansionKind 816 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 817 TargetLoweringBase::AtomicExpansionKind 818 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 819 820 TargetLoweringBase::AtomicExpansionKind 821 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 822 823 bool useLoadStackGuardNode(const Module &M) const override; 824 TargetLoweringBase::LegalizeTypeAction 825 getPreferredVectorAction(MVT VT) const override; 826 827 /// If the target has a standard location for the stack protector cookie, 828 /// returns the address of that location. Otherwise, returns nullptr. 829 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 830 831 void insertSSPDeclarations(Module &M) const override; 832 Value *getSDagStackGuard(const Module &M) const override; 833 Function *getSSPStackGuardCheck(const Module &M) const override; 834 835 /// If the target has a standard location for the unsafe stack pointer, 836 /// returns the address of that location. Otherwise, returns nullptr. 837 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 838 839 /// If a physical register, this returns the register that receives the 840 /// exception address on entry to an EH pad. 841 Register 842 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 843 844 /// If a physical register, this returns the register that receives the 845 /// exception typeid on entry to a landing pad. 846 Register 847 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 848 849 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 850 851 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 852 const MachineFunction &MF) const override; 853 854 bool isCheapToSpeculateCttz(Type *) const override { 855 return true; 856 } 857 858 bool isCheapToSpeculateCtlz(Type *) const override { 859 return true; 860 } 861 862 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 863 864 bool hasAndNotCompare(SDValue V) const override { 865 // We can use bics for any scalar. 866 return V.getValueType().isScalarInteger(); 867 } 868 869 bool hasAndNot(SDValue Y) const override { 870 EVT VT = Y.getValueType(); 871 872 if (!VT.isVector()) 873 return hasAndNotCompare(Y); 874 875 TypeSize TS = VT.getSizeInBits(); 876 // TODO: We should be able to use bic/bif too for SVE. 877 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' 878 } 879 880 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 881 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 882 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 883 SelectionDAG &DAG) const override; 884 885 ShiftLegalizationStrategy 886 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 887 unsigned ExpansionFactor) const override; 888 889 bool shouldTransformSignedTruncationCheck(EVT XVT, 890 unsigned KeptBits) const override { 891 // For vectors, we don't have a preference.. 892 if (XVT.isVector()) 893 return false; 894 895 auto VTIsOk = [](EVT VT) -> bool { 896 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 897 VT == MVT::i64; 898 }; 899 900 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 901 // XVT will be larger than KeptBitsVT. 902 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 903 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 904 } 905 906 bool preferIncOfAddToSubOfNot(EVT VT) const override; 907 908 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 909 910 bool shouldExpandCmpUsingSelects(EVT VT) const override; 911 912 bool isComplexDeinterleavingSupported() const override; 913 bool isComplexDeinterleavingOperationSupported( 914 ComplexDeinterleavingOperation Operation, Type *Ty) const override; 915 916 Value *createComplexDeinterleavingIR( 917 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, 918 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, 919 Value *Accumulator = nullptr) const override; 920 921 bool supportSplitCSR(MachineFunction *MF) const override { 922 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 923 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 924 } 925 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 926 void insertCopiesSplitCSR( 927 MachineBasicBlock *Entry, 928 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 929 930 bool supportSwiftError() const override { 931 return true; 932 } 933 934 bool supportPtrAuthBundles() const override { return true; } 935 936 bool supportKCFIBundles() const override { return true; } 937 938 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 939 MachineBasicBlock::instr_iterator &MBBI, 940 const TargetInstrInfo *TII) const override; 941 942 /// Enable aggressive FMA fusion on targets that want it. 943 bool enableAggressiveFMAFusion(EVT VT) const override; 944 945 /// Returns the size of the platform's va_list object. 946 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 947 948 /// Returns true if \p VecTy is a legal interleaved access type. This 949 /// function checks the vector element type and the overall width of the 950 /// vector. 951 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, 952 bool &UseScalable) const; 953 954 /// Returns the number of interleaved accesses that will be generated when 955 /// lowering accesses of the given type. 956 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, 957 bool UseScalable) const; 958 959 MachineMemOperand::Flags getTargetMMOFlags( 960 const Instruction &I) const override; 961 962 bool functionArgumentNeedsConsecutiveRegisters( 963 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 964 const DataLayout &DL) const override; 965 966 /// Used for exception handling on Win64. 967 bool needsFixedCatchObjects() const override; 968 969 bool fallBackToDAGISel(const Instruction &Inst) const override; 970 971 /// SVE code generation for fixed length vectors does not custom lower 972 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 973 /// merge. However, merging them creates a BUILD_VECTOR that is just as 974 /// illegal as the original, thus leading to an infinite legalisation loop. 975 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 976 /// vector types this override can be removed. 977 bool mergeStoresAfterLegalization(EVT VT) const override; 978 979 // If the platform/function should have a redzone, return the size in bytes. 980 unsigned getRedZoneSize(const Function &F) const { 981 if (F.hasFnAttribute(Attribute::NoRedZone)) 982 return 0; 983 return 128; 984 } 985 986 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; 987 EVT getPromotedVTForPredicate(EVT VT) const; 988 989 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 990 bool AllowUnknown = false) const override; 991 992 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; 993 994 bool 995 shouldExpandPartialReductionIntrinsic(const IntrinsicInst *I) const override; 996 997 bool shouldExpandCttzElements(EVT VT) const override; 998 999 bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override; 1000 1001 /// If a change in streaming mode is required on entry to/return from a 1002 /// function call it emits and returns the corresponding SMSTART or SMSTOP 1003 /// node. \p Condition should be one of the enum values from 1004 /// AArch64SME::ToggleCondition. 1005 SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, 1006 SDValue Chain, SDValue InGlue, unsigned Condition, 1007 SDValue PStateSM = SDValue()) const; 1008 1009 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } 1010 1011 // Normally SVE is only used for byte size vectors that do not fit within a 1012 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 1013 // used for 64bit and 128bit vectors as well. 1014 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 1015 1016 // Follow NEON ABI rules even when using SVE for fixed length vectors. 1017 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1018 EVT VT) const override; 1019 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1020 CallingConv::ID CC, 1021 EVT VT) const override; 1022 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, 1023 CallingConv::ID CC, EVT VT, 1024 EVT &IntermediateVT, 1025 unsigned &NumIntermediates, 1026 MVT &RegisterVT) const override; 1027 1028 /// True if stack clash protection is enabled for this functions. 1029 bool hasInlineStackProbe(const MachineFunction &MF) const override; 1030 1031 #ifndef NDEBUG 1032 void verifyTargetSDNode(const SDNode *N) const override; 1033 #endif 1034 1035 private: 1036 /// Keep a pointer to the AArch64Subtarget around so that we can 1037 /// make the right decision when generating code for different targets. 1038 const AArch64Subtarget *Subtarget; 1039 1040 llvm::BumpPtrAllocator BumpAlloc; 1041 llvm::StringSaver Saver{BumpAlloc}; 1042 1043 bool isExtFreeImpl(const Instruction *Ext) const override; 1044 1045 void addTypeForNEON(MVT VT); 1046 void addTypeForFixedLengthSVE(MVT VT); 1047 void addDRType(MVT VT); 1048 void addQRType(MVT VT); 1049 1050 bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override; 1051 1052 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 1053 bool isVarArg, 1054 const SmallVectorImpl<ISD::InputArg> &Ins, 1055 const SDLoc &DL, SelectionDAG &DAG, 1056 SmallVectorImpl<SDValue> &InVals) const override; 1057 1058 void AdjustInstrPostInstrSelection(MachineInstr &MI, 1059 SDNode *Node) const override; 1060 1061 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 1062 SmallVectorImpl<SDValue> &InVals) const override; 1063 1064 SDValue LowerCallResult(SDValue Chain, SDValue InGlue, 1065 CallingConv::ID CallConv, bool isVarArg, 1066 const SmallVectorImpl<CCValAssign> &RVLocs, 1067 const SDLoc &DL, SelectionDAG &DAG, 1068 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 1069 SDValue ThisVal, bool RequiresSMChange) const; 1070 1071 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 1072 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 1073 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; 1074 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 1075 1076 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 1077 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 1078 1079 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 1080 1081 SDValue LowerVECTOR_COMPRESS(SDValue Op, SelectionDAG &DAG) const; 1082 1083 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1084 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1085 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 1086 1087 bool 1088 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; 1089 1090 /// Finds the incoming stack arguments which overlap the given fixed stack 1091 /// object and incorporates their load into the current chain. This prevents 1092 /// an upcoming store from clobbering the stack argument before it's used. 1093 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 1094 MachineFrameInfo &MFI, int ClobberedFI) const; 1095 1096 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 1097 1098 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 1099 SDValue &Chain) const; 1100 1101 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1102 bool isVarArg, 1103 const SmallVectorImpl<ISD::OutputArg> &Outs, 1104 LLVMContext &Context, const Type *RetTy) const override; 1105 1106 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1107 const SmallVectorImpl<ISD::OutputArg> &Outs, 1108 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 1109 SelectionDAG &DAG) const override; 1110 1111 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 1112 unsigned Flag) const; 1113 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 1114 unsigned Flag) const; 1115 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 1116 unsigned Flag) const; 1117 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 1118 unsigned Flag) const; 1119 SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, 1120 unsigned Flag) const; 1121 template <class NodeTy> 1122 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1123 template <class NodeTy> 1124 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1125 template <class NodeTy> 1126 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1127 template <class NodeTy> 1128 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1129 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1130 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1131 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1132 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1133 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1134 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 1135 const SDLoc &DL, SelectionDAG &DAG) const; 1136 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 1137 SelectionDAG &DAG) const; 1138 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1139 SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1140 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1141 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1142 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 1143 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1144 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 1145 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 1146 SDValue TVal, SDValue FVal, const SDLoc &dl, 1147 SelectionDAG &DAG) const; 1148 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1149 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1150 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1151 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 1152 SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const; 1153 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1154 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1155 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 1156 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 1157 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 1158 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1159 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 1160 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1161 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1162 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 1163 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1164 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1165 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1166 SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const; 1167 SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const; 1168 SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const; 1169 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1170 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1171 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1172 SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; 1173 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 1174 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1175 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 1176 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, 1177 unsigned NewOp) const; 1178 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 1179 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 1180 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1181 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1182 SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 1183 SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 1184 SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const; 1185 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 1186 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 1187 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 1188 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 1189 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 1190 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; 1191 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 1192 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 1193 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; 1194 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 1195 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1196 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1197 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1198 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1199 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1200 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1201 SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const; 1202 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1203 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1204 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 1205 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 1206 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 1207 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 1208 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 1209 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 1210 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1211 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 1212 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 1213 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1214 SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1215 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1216 1217 SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; 1218 1219 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 1220 SelectionDAG &DAG) const; 1221 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 1222 SelectionDAG &DAG) const; 1223 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1224 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1225 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 1226 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 1227 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 1228 SelectionDAG &DAG) const; 1229 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 1230 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 1231 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 1232 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 1233 SelectionDAG &DAG) const; 1234 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 1235 SelectionDAG &DAG) const; 1236 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 1237 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 1238 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 1239 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 1240 SelectionDAG &DAG) const; 1241 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 1242 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 1243 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 1244 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 1245 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 1246 SelectionDAG &DAG) const; 1247 SDValue LowerFixedLengthBuildVectorToSVE(SDValue Op, SelectionDAG &DAG) const; 1248 1249 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1250 SmallVectorImpl<SDNode *> &Created) const override; 1251 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1252 SmallVectorImpl<SDNode *> &Created) const override; 1253 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1254 int &ExtraSteps, bool &UseOneConst, 1255 bool Reciprocal) const override; 1256 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1257 int &ExtraSteps) const override; 1258 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 1259 const DenormalMode &Mode) const override; 1260 SDValue getSqrtResultForDenormInput(SDValue Operand, 1261 SelectionDAG &DAG) const override; 1262 unsigned combineRepeatedFPDivisors() const override; 1263 1264 ConstraintType getConstraintType(StringRef Constraint) const override; 1265 Register getRegisterByName(const char* RegName, LLT VT, 1266 const MachineFunction &MF) const override; 1267 1268 /// Examine constraint string and operand type and determine a weight value. 1269 /// The operand object must already have been set up with the operand type. 1270 ConstraintWeight 1271 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1272 const char *constraint) const override; 1273 1274 std::pair<unsigned, const TargetRegisterClass *> 1275 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1276 StringRef Constraint, MVT VT) const override; 1277 1278 const char *LowerXConstraint(EVT ConstraintVT) const override; 1279 1280 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 1281 std::vector<SDValue> &Ops, 1282 SelectionDAG &DAG) const override; 1283 1284 InlineAsm::ConstraintCode 1285 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1286 if (ConstraintCode == "Q") 1287 return InlineAsm::ConstraintCode::Q; 1288 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 1289 // followed by llvm_unreachable so we'll leave them unimplemented in 1290 // the backend for now. 1291 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1292 } 1293 1294 /// Handle Lowering flag assembly outputs. 1295 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1296 const SDLoc &DL, 1297 const AsmOperandInfo &Constraint, 1298 SelectionDAG &DAG) const override; 1299 1300 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 1301 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; 1302 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1303 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1304 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1305 bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1306 SDValue &Offset, SelectionDAG &DAG) const; 1307 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1308 ISD::MemIndexedMode &AM, 1309 SelectionDAG &DAG) const override; 1310 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1311 SDValue &Offset, ISD::MemIndexedMode &AM, 1312 SelectionDAG &DAG) const override; 1313 bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, 1314 bool IsPre, MachineRegisterInfo &MRI) const override; 1315 1316 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1317 SelectionDAG &DAG) const override; 1318 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1319 SelectionDAG &DAG) const; 1320 void ReplaceExtractSubVectorResults(SDNode *N, 1321 SmallVectorImpl<SDValue> &Results, 1322 SelectionDAG &DAG) const; 1323 1324 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1325 1326 void finalizeLowering(MachineFunction &MF) const override; 1327 1328 bool shouldLocalize(const MachineInstr &MI, 1329 const TargetTransformInfo *TTI) const override; 1330 1331 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1332 const APInt &OriginalDemandedBits, 1333 const APInt &OriginalDemandedElts, 1334 KnownBits &Known, 1335 TargetLoweringOpt &TLO, 1336 unsigned Depth) const override; 1337 1338 bool isTargetCanonicalConstantNode(SDValue Op) const override; 1339 1340 // With the exception of data-predicate transitions, no instructions are 1341 // required to cast between legal scalable vector types. However: 1342 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1343 // is not universally useable. 1344 // 2. Most unpacked integer types are not legal and thus integer extends 1345 // cannot be used to convert between unpacked and packed types. 1346 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1347 // to transition between unpacked and packed types of the same element type, 1348 // with BITCAST used otherwise. 1349 // This function does not handle predicate bitcasts. 1350 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1351 1352 // Returns the runtime value for PSTATE.SM by generating a call to 1353 // __arm_sme_state. 1354 SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL, 1355 EVT VT) const; 1356 1357 bool preferScalarizeSplat(SDNode *N) const override; 1358 1359 unsigned getMinimumJumpTableEntries() const override; 1360 1361 bool softPromoteHalfType() const override { return true; } 1362 1363 bool shouldScalarizeBinop(SDValue VecOp) const override { 1364 return VecOp.getOpcode() == ISD::SETCC; 1365 } 1366 }; 1367 1368 namespace AArch64 { 1369 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1370 const TargetLibraryInfo *libInfo); 1371 } // end namespace AArch64 1372 1373 } // end namespace llvm 1374 1375 #endif 1376