AArch64ISelLowering.cpp - OpenGrok cross reference for /llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines Matching +full:push +full:- +full:ci +full:- +full:container
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation  ----===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
108 #define DEBUG_TYPE "aarch64-lower"
118     "aarch64-elf-ldtls-generation", cl::Hidden,
123 EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
133 EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
138 static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
145 static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
153     "aarch64-enable-gisel-sve", cl::Hidden,
349   // Otherwise, it's either a constant discriminator, or a non-blended
351   if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
352       Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
353     AddrDisc = Disc->getOperand(1);
354     ConstDisc = Disc->getOperand(2);
360   // discriminator value) isn't a 16-bit constant, bail out, and let the
363   if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
364     return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
369     AddrDisc = DAG->getRegister(AArch64::NoRegister, MVT::i64);
372       DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
383   // vector to all-one or all-zero.
390   if (Subtarget->hasLS64()) {
396   if (Subtarget->hasFPARMv8()) {
404   if (Subtarget->hasNEON()) {
427   if (Subtarget->isSVEorStreamingSVEAvailable()) {
452     if (Subtarget->useSVEForFixedLengthVectors()) {
463   if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
473   computeRegisterProperties(Subtarget->getRegisterInfo());
562   // Lowering for many of the conversions is actually specified by the non-f128
588   if (Subtarget->hasFPARMv8()) {
594   if (Subtarget->hasFPARMv8()) {
612   // Variable-sized objects.
630   // AArch64 lacks both left-rotate and popcount instructions.
648   if (Subtarget->hasCSSC()) {
727   if (Subtarget->hasFullFP16()) {
810     // Round-to-integer need custom lowering for fp16, as Promote doesn't work
867   if (!Subtarget->hasFullFP16()) {
874   // AArch64 has implementations of a lot of rounding-like FP operations.
875   // clang-format off
890     if (Subtarget->hasFullFP16())
893   // clang-format on
900     if (Subtarget->hasFullFP16())
913   if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
925   if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
974   if (Subtarget->hasLSE128()) {
982   // 128-bit loads and stores can be done without expanding
986   // Aligned 128-bit loads and stores are single-copy atomic according to the
987   // v8.4a spec. LRCPC3 introduces 128-bit STILP/LDIAPP but still requires LSE2.
988   if (Subtarget->hasLSE2()) {
993   // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
994   // custom lowering, as there are no un-paired non-temporal stores and
1005   // 256 bit non-temporal loads can be lowered to LDNP. This is done using
1006   // custom lowering, as there are no un-paired non-temporal loads legalization
1030   // Make floating-point constants legal for the large code model, so they don't
1032   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
1037   // AArch64 does not have floating-point extending loads, i1 sign-extending
1038   // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
1057   if (Subtarget->hasFPARMv8()) {
1098   // Vector add and sub nodes may conceal a high-half opportunity.
1148       Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
1153       Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
1157       Subtarget->requiresStrictAlign() ? MaxStoresPerMemmoveOptSize : 16;
1161       Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
1176   if (!Subtarget->isTargetWindows())
1192   if (Subtarget->hasSME())
1195   if (Subtarget->isNeonAvailable()) {
1198     // clang-format off
1221     // clang-format on
1230     // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1235     // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1236     // Or, direct i32 -> f16 vector conversion.  Set it so custom, so the
1237     // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1243     if (Subtarget->hasFullFP16()) {
1283     // Custom handling for some quad-vector types to detect MULL.
1313       if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1374       if (Subtarget->hasFullFP16())
1383       if (Subtarget->hasFullFP16())
1421                            Subtarget->isLittleEndian() ? Legal : Expand);
1438   if (Subtarget->hasSME()) {
1444   if (Subtarget->isSVEorStreamingSVEAvailable()) {
1454   if (Subtarget->isSVEorStreamingSVEAvailable()) {
1517       if (!Subtarget->isLittleEndian())
1520       if (Subtarget->hasSVE2() ||
1521           (Subtarget->hasSME() && Subtarget->isStreaming()))
1598     // SVE supports truncating stores of 64 and 128-bit vectors
1694       if (Subtarget->hasSVEB16B16()) {
1714     if (!Subtarget->hasSVEB16B16()) {
1733     // NEON doesn't support 64-bit vector integer muls, but SVE does.
1739     if (Subtarget->useSVEForFixedLengthVectors()) {
1742                 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
1747                 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
1812   // Handle operations that are only available in non-streaming SVE mode.
1813   if (Subtarget->isSVEAvailable()) {
1843     if (Subtarget->hasSVE2()) {
1852   if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1859   if (Subtarget->hasSVE()) {
1866   PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1871   // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined.  MinGW has
1873   if (Subtarget->isTargetWindows()) {
1885   if (Subtarget->isWindowsArm64EC()) {
1927   // But we do support custom-lowering for FCOPYSIGN.
1931        Subtarget->hasFullFP16()))
1980       (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
2003   //  * The lowering of the non-strict versions involves target-specific ISD
2009   if (Subtarget->isLittleEndian()) {
2017   if (Subtarget->hasD128()) {
2025   // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
2026   if (!Subtarget->hasSVE())
2030   // whilelo instruction for generating fixed-width predicates too.
2045   if (I->getIntrinsicID() != Intrinsic::experimental_vector_partial_reduce_add)
2048   EVT VT = EVT::getEVT(I->getType());
2049   auto Op1 = I->getOperand(1);
2050   EVT Op1VT = EVT::getEVT(Op1->getType());
2059   if (!Subtarget->isSVEorStreamingSVEAvailable())
2063   // also support fixed-width predicates.
2071   // MATCH is SVE2 and only available in non-streaming mode.
2072   if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
2074   // Furthermore, we can only use it for 8-bit or 16-bit elements.
2117   // Mark floating-point truncating stores/extending loads as having custom
2130   bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2228   if (Subtarget->isNeonAvailable())
2234   if (Subtarget->isNeonAvailable())
2247 // isIntImmediate - This method tests to see if the node is a constant
2251     Imm = C->getZExtValue();
2257 // isOpcWithIntImmediate - This method tests to see if the node is a specific
2262   return N->getOpcode() == Opc &&
2263          isIntImmediate(N->getOperand(1).getNode(), Imm);
2271   uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
2286     // The goal here is to set the non-demanded bits in a way that minimizes
2288     // we set the non-demanded bits to the value of the preceding demanded bits.
2290     // non-demanded bit), we copy bit0 (1) to the least significant 'x',
2296         ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2299     bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2304     // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
2309     // We cannot shrink the element size any further if it is 2-bits.
2344   // If the new constant immediate is all-zeros or all-ones, let the target
2401   uint64_t Imm = C->getZExtValue();
2405 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
2425     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2426     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2433         ~(Op->getConstantOperandAPInt(1) << Op->getConstantOperandAPInt(2))
2435     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2441     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2442     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2448     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2449     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2455     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2456     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2462         APInt(Known.getBitWidth(), Op->getConstantOperandVal(0)));
2467     if (!Subtarget->isTargetILP32())
2469     // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
2474     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2480         static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
2486       EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
2488       Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
2506         APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - Bound);
2515       // bits larger than the element datatype. 32-bit or larget doesn't need
2521         APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
2525         APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
2560       // Compares return either 0 or all-ones
2582   // even with +strict-align. Predicated SVE loads/stores (e.g. ld1/st1), used
2583   // for stores that come from IR, only require element-size alignment (even if
2585   // have 16-byte alignment with +strict-align (and fail to lower as we don't
2593   if (Subtarget->requiresStrictAlign())
2597     // Some CPUs are fine with unaligned stores except for 128-bit ones.
2598     *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
2608             // them regresses performance on micro-benchmarks and olden/bh.
2618   if (Subtarget->requiresStrictAlign())
2622     // Some CPUs are fine with unaligned stores except for 128-bit ones.
2623     *Fast = !Subtarget->isMisaligned128StoreSlow() ||
2634             // them regresses performance on micro-benchmarks and olden/bh.
2989   // We materialise the F128CSEL pseudo-instruction as some control flow and a
3001   MachineFunction *MF = MBB->getParent();
3002   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3003   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3005   MachineFunction::iterator It = ++MBB->getIterator();
3013   MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
3014   MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
3015   MF->insert(It, TrueBB);
3016   MF->insert(It, EndBB);
3018   // Transfer rest of current basic-block to EndBB
3019   EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
3020                 MBB->end());
3021   EndBB->transferSuccessorsAndUpdatePHIs(MBB);
3023   BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
3024   BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
3025   MBB->addSuccessor(TrueBB);
3026   MBB->addSuccessor(EndBB);
3029   TrueBB->addSuccessor(EndBB);
3032     TrueBB->addLiveIn(AArch64::NZCV);
3033     EndBB->addLiveIn(AArch64::NZCV);
3036   BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
3049              BB->getParent()->getFunction().getPersonalityFn())) &&
3057   MachineFunction &MF = *MBB->getParent();
3059   DebugLoc DL = MBB->findDebugLoc(MBBI);
3067   return NextInst->getParent();
3074   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3075   MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
3090   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3092       BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));
3108   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3111   MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode))
3124   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3125   MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
3156   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3158       BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
3174   MachineFunction *MF = BB->getParent();
3175   MachineFrameInfo &MFI = MF->getFrameInfo();
3176   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3177   TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3179     const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3181     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui))
3185     // Set the reserved bytes (10-15) to zero
3186     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui))
3190     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui))
3197   BB->remove_instr(&MI);
3204   MachineFunction *MF = BB->getParent();
3205   MachineFrameInfo &MFI = MF->getFrameInfo();
3206   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3211   assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() &&
3214   TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3217     const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3218     MachineRegisterInfo &MRI = MF->getRegInfo();
3223     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), SP)
3226     // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
3229     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MSUBXrrr), Dest)
3233     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
3241   BB->remove_instr(&MI);
3249   MachineFunction *MF = BB->getParent();
3250   MachineFrameInfo &MFI = MF->getFrameInfo();
3251   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3252   assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() &&
3255   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3256   if (FuncInfo->isSMESaveBufferUsed()) {
3260     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::SUBXrx64), AArch64::SP)
3264     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), Dest)
3270     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
3273   BB->remove_instr(&MI);
3281   MachineFunction *MF = BB->getParent();
3282   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3283   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3284   if (FuncInfo->isSMESaveBufferUsed()) {
3285     const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3286     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
3289         .addRegMask(TRI->getCallPreservedMask(
3292     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
3296     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
3299   BB->remove_instr(&MI);
3307   if (SMEOrigInstr != -1) {
3308     const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3310         TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
3346     // has implicit def. This def is early-clobber as it will be set at
3404 //===----------------------------------------------------------------------===//
3406 //===----------------------------------------------------------------------===//
3408 //===----------------------------------------------------------------------===//
3410 //===----------------------------------------------------------------------===//
3422 /// isZerosVector - Check whether SDNode N is a zero-filled vector.
3425   while (N->getOpcode() == ISD::BITCAST)
3426     N = N->getOperand(0).getNode();
3431   if (N->getOpcode() != AArch64ISD::DUP)
3434   auto Opnd0 = N->getOperand(0);
3438 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
3467 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
3557 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
3583     // All of the compare-mask comparisons are ordered, but we can switch
3606 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
3612 // So, finally, the only LLVM-native comparisons that don't mention C or V
3709 ///   - We can implement (NEG SETCC) i.e. negating a single comparison by
3711 ///   - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
3714 ///   - Note that we can only ever negate all previously processed results.
3716 ///     of two sub-trees (because the negation affects all sub-trees emitted so
3717 ///     far, so the 2nd sub-tree we emit would also affect the first).
3719 ///   - (OR (SETCC A) (SETCC B)) can be implemented via:
3721 ///   - After transforming OR to NEG/AND combinations we may be able to use NEG
3764     APInt Imm = Const->getAPIntValue();
3765     if (Imm.isNegative() && Imm.sgt(-32)) {
3767       RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
3791 /// \param CanNegate    Set to true if we can negate the whole sub-tree just by
3794 ///                      Negate==true on this sub-tree)
3808   unsigned Opcode = Val->getOpcode();
3810     if (Val->getOperand(0).getValueType() == MVT::f128)
3821     SDValue O0 = Val->getOperand(0);
3822     SDValue O1 = Val->getOperand(1);
3841       // the leafs, then this sub-tree as a whole negates naturally.
3843       // If we cannot naturally negate the whole sub-tree, then this must be
3863 /// \p Negate is true if we want this sub-tree being negated just by changing
3869   unsigned Opcode = Val->getOpcode();
3871     SDValue LHS = Val->getOperand(0);
3872     SDValue RHS = Val->getOperand(1);
3873     ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
3906   assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
3910   SDValue LHS = Val->getOperand(0);
3917   SDValue RHS = Val->getOperand(1);
3924   // Swap sub-tree that must come first to the right side.
3937     // Swap the sub-tree that we can negate naturally to the left.
3946       // Negate the left sub-tree if possible, otherwise negate the result.
3962   // Emit sub-trees.
3998         uint64_t Mask = MaskCst->getZExtValue();
4014       uint64_t Shift = ShiftCst->getZExtValue();
4030     uint64_t C = RHSC->getZExtValue();
4039              isLegalArithImmed((uint32_t)(C - 1))) ||
4041              isLegalArithImmed(C - 1ULL))) {
4043           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
4050              isLegalArithImmed((uint32_t)(C - 1))) ||
4051             (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
4053           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
4094       !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
4117     // -1 constant. For example,
4128     if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
4129         cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
4130         cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
4131         LHS.getNode()->hasNUsesOfValue(1, 0)) {
4132       int16_t ValueofRHS = RHS->getAsZExtVal();
4133       if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
4144     if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
4146         if ((CC == ISD::SETNE) ^ RHSC->isZero())
4194       // Extend to 64-bits, then perform a 64-bit multiply.
4201       // Check that the result fits into a 32-bit integer.
4241     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
4252                                    !Subtarget->isNeonAvailable()))
4262   //   -->
4268     if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
4287   // (xor x, (select_cc a, b, cc, 0, -1) )
4288   //   -->
4289   // (csel x, (xor x, -1), cc ...)
4293   ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
4299   // FIXME: This could be generalized to non-integer comparisons.
4312   if (CTVal->isAllOnes() && CFVal->isZero()) {
4319   if (CTVal->isZero() && CFVal->isAllOnes()) {
4438     // The front-end should have filtered out the out-of-range values
4439     assert(Locality <= 3 && "Prefetch locality out-of-range");
4443     Locality = 3 - Locality;
4456 // Converts SETCC (AND X Y) Z ULT -> SETCC (AND X (Y & ~(Z - 1)) 0 EQ when Y is
4457 // a power of 2. This is then lowered to ANDS X (Y & ~(Z - 1)) instead of SUBS
4461   if (CC == ISD::SETULT && LHS.getOpcode() == ISD::AND && LHS->hasOneUse()) {
4465       uint64_t LHSConstValue = LHSConstOp->getZExtValue();
4466       uint64_t RHSConstant = RHSConst->getZExtValue();
4468         uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4496       // and the second using native f32->VT instructions.
4505   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
4508   bool IsStrict = Op->isStrictFPOpcode();
4512     // FP16->FP32 extends are legal for v32 and v4f32.
4515     // Split bf16->f64 extends into two fpextends.
4530     // FP16->FP32 extends are legal for v32 and v4f32.
4563   bool IsStrict = Op->isStrictFPOpcode();
4574     auto ImmV = [&](int I) -> SDValue { return DAG.getConstant(I, DL, I32); };
4580       if (Subtarget->hasBF16())
4590                (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
4601       return DAG.getNode(Op.getOpcode(), DL, VT, NewOps, Op->getFlags());
4627   if (useSVEForFixedLengthVectorVT(SrcVT, !Subtarget->isNeonAvailable()))
4633       !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4634         Subtarget->hasBF16())) {
4704   bool IsStrict = Op->isStrictFPOpcode();
4715   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
4716       useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
4722   if ((InVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4769   // Use a scalar operation for conversions between single-element vectors of
4789   bool IsStrict = Op->isStrictFPOpcode();
4796   if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4822   // AArch64 FP-to-int conversions saturate to the destination element size, so
4827   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4847        (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
4926   // AArch64 FP-to-int conversions saturate to the destination register size, so
4935   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4941   if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
4951        (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
4992   // Round the floating-point value into a floating-point register with the
5006   bool IsStrict = Op->isStrictFPOpcode();
5028   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
5029       useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
5075   // Use a scalar operation for conversions between single-element vectors of
5096   bool IsStrict = Op->isStrictFPOpcode();
5099   bool IsSigned = Op->getOpcode() == ISD::STRICT_SINT_TO_FP ||
5100                   Op->getOpcode() == ISD::SINT_TO_FP;
5131     // We need to be careful about i64 -> bf16.
5154       // double-precision value or it is too big. If it is sufficiently small,
5155       // we should just go u64 -> double -> bf16 in a naive way. Otherwise, we
5156       // ensure that u64 -> double has no rounding error by only using the 52
5217   if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5225   // Other conversions are legal, unless it's to the completely software-based
5282              "Expected int->fp bitcast!");
5324 // Returns lane if Op extracts from a two-element vector and lane is constant
5329   if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
5332   EVT VT = OpNode->getOperand(0).getValueType();
5333   ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));
5337   return C->getZExtValue();
5347   for (const SDValue &Elt : N->op_values()) {
5352         if (!isIntN(HalfSize, C->getSExtValue()))
5355         if (!isUIntN(HalfSize, C->getZExtValue()))
5393     return N0->hasOneUse() && N1->hasOneUse() &&
5404     return N0->hasOneUse() && N1->hasOneUse() &&
5413   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
5436   SDValue Chain = Op->getOperand(0);
5437   SDValue RMValue = Op->getOperand(1);
5441   // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
5442   // ((arg - 1) & 3) << 22).
5480   SDValue Chain = Op->getOperand(0);
5499   SDValue Chain = Op->getOperand(0);
5500   SDValue Mode = Op->getOperand(1);
5514   SDValue Chain = Op->getOperand(0);
5595   bool OverrideNEON = !Subtarget->isNeonAvailable();
5599   // Multiplications are only custom-lowered for 128-bit and 64-bit vectors so
5602          "unexpected type for custom-lowering ISD::MUL");
5617         if (Subtarget->hasSVE())
5634       if (Subtarget->hasSVE())
5656   // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
5657   // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
5730          "Expected a predicate-to-predicate bitcast");
5752   // case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
5789 //    ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11]
5793 //    ->
5805 //    ->
5811 // Case 4: If the vecnum is an add of an immediate, then the non-immediate
5815 //    ->
5826   SDValue TileSlice = N->getOperand(2);
5827   SDValue Base = N->getOperand(3);
5828   SDValue VecNum = N->getOperand(4);
5835     ConstAddend = cast<ConstantSDNode>(VecNum.getOperand(1))->getSExtValue();
5838     ConstAddend = ImmNode->getSExtValue();
5843   if (int32_t C = (ConstAddend - ImmAddend)) {
5885          "Expected 8-bit or 16-bit characters.");
5888   // A single container is enough for both operands because ultimately the
5895     // If Op2 is a full 128-bit vector, wrap it trivially in a scalable vector.
5902     // If Op2 is not a full 128-bit vector, we always need to broadcast it.
5966         Op->getOperand(0), // Chain
5972         Op->getOperand(0), // Chain
5987     SDValue Chain = Node->getChain();
5992     auto Alignment = Node->getMemOperand()->getAlign();
5993     bool IsVol = Node->isVolatile();
5994     auto DstPtrInfo = Node->getPointerInfo();
6053                                   SelectionDAG &DAG) -> SDValue {
6056       // re-use the dag-combiner function with aarch64_neon_{pmull,smull,umull}.
6081       // the non-high version of PMULL instruction. Use v1i64 to represent i64.
6405     const auto *RegInfo = Subtarget->getRegisterInfo();
6406     unsigned Reg = RegInfo->getLocalAddressRegister(MF);
6418     auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
6491     // then extracting a fixed-width subvector from the scalable vector.
6529       // an SVE predicate register mask from the fixed-width vector.
6557   // SVE only supports implicit extension of 32-bit indices.
6558   if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
6565   // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit
6566   // element container type, which would violate the previous clause.
6572   if (!ExtVT.isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
6580   if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal->getOperand(0))) {
6581     if (!isLoadExtLegalOrCustom(ISD::ZEXTLOAD, ExtVT, Ld->getValueType(0))) {
6582       // Disable extending masked loads for fixed-width for now, since the code
6588       for (auto *U : Ld->getMask()->users())
6620   return AddrModes.find(Key)->second;
6650   SDValue Chain = MGT->getChain();
6651   SDValue PassThru = MGT->getPassThru();
6652   SDValue Mask = MGT->getMask();
6653   SDValue BasePtr = MGT->getBasePtr();
6654   SDValue Index = MGT->getIndex();
6655   SDValue Scale = MGT->getScale();
6657   EVT MemVT = MGT->getMemoryVT();
6658   ISD::LoadExtType ExtType = MGT->getExtensionType();
6659   ISD::MemIndexType IndexType = MGT->getIndexType();
6663   if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) {
6666         DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
6667                             MGT->getMemOperand(), IndexType, ExtType);
6672   bool IsScaled = MGT->isIndexScaled();
6673   bool IsSigned = MGT->isIndexSigned();
6677   uint64_t ScaleVal = Scale->getAsZExtVal();
6679     assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
6686     return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
6687                                MGT->getMemOperand(), IndexType, ExtType);
6692     assert(Subtarget->useSVEForFixedLengthVectors() &&
6695     // NOTE: Handle floating-point as if integer then bitcast the result.
6722     PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT)
6729                             Ops, MGT->getMemOperand(), IndexType, ExtType);
6749   SDValue Chain = MSC->getChain();
6750   SDValue StoreVal = MSC->getValue();
6751   SDValue Mask = MSC->getMask();
6752   SDValue BasePtr = MSC->getBasePtr();
6753   SDValue Index = MSC->getIndex();
6754   SDValue Scale = MSC->getScale();
6756   EVT MemVT = MSC->getMemoryVT();
6757   ISD::MemIndexType IndexType = MSC->getIndexType();
6758   bool Truncating = MSC->isTruncatingStore();
6760   bool IsScaled = MSC->isIndexScaled();
6761   bool IsSigned = MSC->isIndexSigned();
6765   uint64_t ScaleVal = Scale->getAsZExtVal();
6767     assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
6774     return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
6775                                 MSC->getMemOperand(), IndexType, Truncating);
6780     assert(Subtarget->useSVEForFixedLengthVectors() &&
6783     // Once bitcast we treat floating-point scatters as if integer.
6817     return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
6818                                 MSC->getMemOperand(), IndexType, Truncating);
6829   EVT VT = Op->getValueType(0);
6834   SDValue PassThru = LoadNode->getPassThru();
6835   SDValue Mask = LoadNode->getMask();
6837   if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
6841       VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
6842       LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
6843       LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
6844       LoadNode->getExtensionType());
6858   SDValue Value = ST->getValue();
6879   return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
6880                       ST->getBasePtr(), ST->getMemOperand());
6892   SDValue Value = StoreNode->getValue();
6895   EVT MemVT = StoreNode->getMemoryVT();
6900             /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
6903     unsigned AS = StoreNode->getAddressSpace();
6904     Align Alignment = StoreNode->getAlign();
6907                                         StoreNode->getMemOperand()->getFlags(),
6912     if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
6916     // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
6917     // the custom lowering, as there are no un-paired non-temporal stores and
6920     if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
6929                       StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
6933                       StoreNode->getValue(),
6937           {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
6938           StoreNode->getMemoryVT(), StoreNode->getMemOperand());
6941   } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
6944     SDValue Value = StoreNode->getValue();
6945     assert(Value->getValueType(0) == MVT::i64x8);
6946     SDValue Chain = StoreNode->getChain();
6947     SDValue Base = StoreNode->getBasePtr();
6954       Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
6955                            StoreNode->getOriginalAlign());
6963 /// Lower atomic or volatile 128-bit stores to a single STP instruction.
6967   assert(StoreNode->getMemoryVT() == MVT::i128);
6968   assert(StoreNode->isVolatile() || StoreNode->isAtomic());
6971       StoreNode->getMergedOrdering() == AtomicOrdering::Release;
6972   if (StoreNode->isAtomic())
6973     assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
6974             Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
6975            StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
6976            StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
6978   SDValue Value = (StoreNode->getOpcode() == ISD::STORE ||
6979                    StoreNode->getOpcode() == ISD::ATOMIC_STORE)
6980                       ? StoreNode->getOperand(1)
6981                       : StoreNode->getOperand(2);
6989       {StoreNode->getChain(), StoreValue.first, StoreValue.second,
6990        StoreNode->getBasePtr()},
6991       StoreNode->getMemoryVT(), StoreNode->getMemOperand());
7001   if (LoadNode->getMemoryVT() == MVT::i64x8) {
7003     SDValue Base = LoadNode->getBasePtr();
7004     SDValue Chain = LoadNode->getChain();
7010                                  LoadNode->getPointerInfo(),
7011                                  LoadNode->getOriginalAlign());
7020   EVT VT = Op->getValueType(0);
7023   if (LoadNode->getMemoryVT() != MVT::v4i8)
7027   if (Subtarget->requiresStrictAlign() && LoadNode->getAlign() < Align(4))
7031   if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
7033   else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
7034            LoadNode->getExtensionType() == ISD::EXTLOAD)
7039   SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
7040                              LoadNode->getBasePtr(), MachinePointerInfo());
7068   if (!Subtarget->isSVEAvailable())
7197           VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
7256   if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
7267   if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
7290         DAG.getConstant(MFI.getObjectSize(FI->getIndex()), dl, MVT::i64);
7503     if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
7504       assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
7546     EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
7561                                      !Subtarget->isNeonAvailable()))
7635            "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
7660   return !Subtarget->useSVEForFixedLengthVectors();
7685   // NEON-sized vectors can be emulated using SVE instructions.
7687     return Subtarget->isSVEorStreamingSVEAvailable();
7694   if (!Subtarget->useSVEForFixedLengthVectors())
7698   if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
7709 //===----------------------------------------------------------------------===//
7711 //===----------------------------------------------------------------------===//
7714   unsigned Opcode = N->getOpcode();
7719     unsigned IID = N->getConstantOperandVal(0);
7755     // The non-vararg case is handled in the CC function itself.
7768     if (Subtarget->isTargetWindows()) {
7770         if (Subtarget->isWindowsArm64EC())
7776     if (!Subtarget->isTargetDarwin())
7780     return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
7784       if (Subtarget->isWindowsArm64EC())
7790     if (Subtarget->isWindowsArm64EC())
7814     if (Subtarget->isWindowsArm64EC())
7833       Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
7835                     (isVarArg && Subtarget->isWindowsArm64EC());
7842     FuncInfo->setIsSVECC(true);
7861       std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
7865       EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
7893     CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
7897       // non-compliant manner for larger structs.
7902       // FIXME: This works on big-endian for composite byvals, which are the common
7913       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
7935         FuncInfo->setIsSVECC(true);
7938         FuncInfo->setIsSVECC(true);
7941         FuncInfo->setIsSVECC(true);
7956         //   tn: res,ch,glue = CopyFromReg t(n-1), ..
7972       // If this is an 8, 16 or 32-bit value, it is really passed promoted
7982             (VA.getValVT().isScalableVT() || Subtarget->isWindowsArm64EC()) &&
8006       if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
8008         BEAlign = 8 - ArgSize;
8046                 Subtarget->isWindowsArm64EC()) &&
8067               Subtarget->isWindowsArm64EC()) &&
8073         while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
8085         NumParts--;
8104       if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
8108       // i1 arguments are zero-extended to i8 by the caller. Emit a
8112         if (OrigArg->getType()->isIntegerTy(1)) {
8133       FuncInfo->setPStateSMReg(Reg);
8154     if (!Subtarget->isTargetDarwin() || IsWin64) {
8155       // The AAPCS variadic function ABI is identical to the non-variadic
8165     // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
8166     VarArgsOffset = alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
8167     FuncInfo->setVarArgsStackOffset(VarArgsOffset);
8168     FuncInfo->setVarArgsStackIndex(
8177                                        FuncInfo->getForwardedMustTailRegParms();
8197         assert(!FuncInfo->getSRetReturnReg());
8202         FuncInfo->setSRetReturnReg(Reg);
8214     // This is a non-standard ABI so by fiat I say we're allowed to make full
8221     FuncInfo->setArgumentStackToRestore(StackArgSize);
8229   FuncInfo->setBytesInStackArgArea(StackArgSize);
8231   if (Subtarget->hasCustomCallingConv())
8232     Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
8237     TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
8243     if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) {
8264     if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) {
8279     FuncInfo->setSMESaveBufferAddr(BufferPtr);
8288         DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
8310       Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
8316   if (Subtarget->isWindowsArm64EC()) {
8317     // In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs
8323   unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
8327       GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
8330         MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
8335     if (Subtarget->isWindowsArm64EC()) {
8337       // compute its address relative to x4.  For a normal AArch64->AArch64
8354                                      MF, GPRIdx, (i - FirstVariadicGPR) * 8)
8361   FuncInfo->setVarArgsGPRIndex(GPRIdx);
8362   FuncInfo->setVarArgsGPRSize(GPRSaveSize);
8364   if (Subtarget->hasFPARMv8() && !IsWin64) {
8369     unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
8387     FuncInfo->setVarArgsFPRIndex(FPRIdx);
8388     FuncInfo->setVarArgsFPRSize(FPRSaveSize);
8396 /// LowerCallResult - Lower the result values of a call into the
8504   bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC, IsVarArg);
8575   // The check for matching callee-saved regs will determine whether it is
8578       MF.getInfo<AArch64FunctionInfo>()->isSVECC())
8583   // When using the Windows calling convention on a non-windows OS, we want
8586   if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
8596     if (i->hasByValAttr())
8599     // On Windows, "inreg" attributes signify non-aggregate indirect returns.
8605     if (i->hasInRegAttr())
8612   // Externally-defined functions with weak linkage should not be
8613   // tail-called on AArch64 when the OS does not support dynamic
8614   // pre-emption of symbols, as the AAELF spec requires normal calls
8617   // situation (as used for tail calls) is implementation-defined, so we
8620     const GlobalValue *GV = G->getGlobal();
8622     if (GV->hasExternalWeakLinkage() &&
8643   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8644   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8646     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8647     if (Subtarget->hasCustomCallingConv()) {
8648       TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
8649       TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
8651     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8664   if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
8686                 Subtarget->isWindowsArm64EC()) &&
8694   if (CCInfo.getStackSize() > FuncInfo->getBytesInStackArgArea())
8710   int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
8718   for (SDNode *U : DAG.getEntryNode().getNode()->users())
8720       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
8721         if (FI->getIndex() < 0) {
8722           int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
8724           InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
8741 // Check if the value is zero-extended from i1 to i8
8766   MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
8786     if (!Def || !Def->getParent()->isCopy())
8789     const MachineOperand &CopySrc = Def->getParent()->getOperand(1);
8795     if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg ||
8796         MRI.getRegClass(CopySrcOp->getReg()) != RegClass)
8805   // Live-in physreg copies that are glued to SMSTART are applied as
8806   // implicit-def's in the InstrEmitter. Here we remove them, allowing the
8808   // copies to avoid these fake clobbers of actually-preserved GPRs.
8811     for (unsigned I = MI.getNumOperands() - 1; I > 0; --I)
8835     const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8837                                       TII->get(TargetOpcode::REG_SEQUENCE),
8842       MIB.addImm(AArch64::zsub0 + (I - 1));
8851   // frame-address. If they contain a frame-index to a scalable vector, this
8855   if (MF.getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
8873   FuncInfo->setHasStreamingModeChanges(true);
8875   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8876   SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask());
8901   FuncInfo->setSMESaveBufferUsed();
8907       DAG.getCopyFromReg(Chain, DL, Info->getSMESaveBufferAddr(), MVT::i64);
8934 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
8956   bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
8960   if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
8961       !Subtarget->noBTIAtReturnTwice()) {
8962     GuardWithBTI = FuncInfo->branchTargetEnforcement();
9015   if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9032   // arguments to begin at SP+0. Completely unused for non-tail calls.
9036     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
9039     // popped size 16-byte aligned.
9045     FPDiff = NumReusableBytes - NumBytes;
9049     if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
9050       FuncInfo->setTailCallReservedStack(-FPDiff);
9052     // The stack pointer must be 16-byte aligned at all times it's used for a
9055     // a 16-byte aligned SP and the delta applied for the tail call should
9065     CalleeAttrs = SMEAttrs(ES->getSymbol());
9068       [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
9071       R << ore::NV("Callee", ES->getSymbol());
9072     else if (CLI.CB && CLI.CB->getCalledFunction())
9073       R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
9084     const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
9153   // PSTATE.ZA before the call if there is no lazy-save active.
9156          "Lazy-save should have PSTATE.SM=1 on entry to the function");
9177   if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
9178     const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
9188     CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
9206         // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
9209         // already be zero-extended.
9211         // We cannot just emit a (zext i8 (trunc (assert-zext i8)))
9215         //   (ext (zext x)) -> (zext x)
9227       assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
9243       assert((isScalable || Subtarget->isWindowsArm64EC()) &&
9250         while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
9273         NumParts--;
9308         // parts of an [N x i32] into an X-register. The extension type will
9316                 ->second;
9344       // FIXME: This works on big-endian for composite byvals, which are the
9355       if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
9358           BEAlign = 8 - OpSize;
9390             /*CI=*/nullptr, std::nullopt, DstInfo, MachinePointerInfo());
9407   if (IsVarArg && Subtarget->isWindowsArm64EC()) {
9430     if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
9443   // Build a sequence of copy-to-reg nodes chained together with token chain
9457     CalledGlobal = G->getGlobal();
9458     OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
9464       const GlobalValue *GV = G->getGlobal();
9469                    Subtarget->isTargetMachO()) ||
9470                   MF.getFunction().getParent()->getRtLibUseGOT();
9471     const char *Sym = S->getSymbol();
9480   // We don't usually want to end the call-sequence here because we would tidy
9481   // the frame up *after* the call, however in the ABI-changing tail-call case
9522     const uint64_t Key = CLI.PAI->Key;
9529         extractPtrauthBlendDiscriminators(CLI.PAI->Discriminator, &DAG);
9546   // Add a register mask operand representing the call-preserved registers.
9548   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
9550     // For 'this' returns, use the X0-preserving mask if applicable
9551     Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
9554       Mask = TRI->getCallPreservedMask(MF, CallConv);
9557     Mask = TRI->getCallPreservedMask(MF, CallConv);
9559   if (Subtarget->hasCustomCallingConv())
9560     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
9562   if (TRI->isAnyArgRegReserved(MF))
9563     TRI->emitReservedArgRegCallError(MF);
9577       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
9589     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
9610     InGlue = Result.getValue(Result->getNumValues() - 1);
9618     if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
9640     TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
9642         TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
9679       // a vreg -> vreg copy.
9693         DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
9744         // AAPCS requires i1 to be zero-extended to i8 by the producer of the
9759       assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
9770           })->second;
9778   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
9784       Register Reg = FuncInfo->getPStateSMReg();
9812   if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
9826   const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
9862 //===----------------------------------------------------------------------===//
9864 //===----------------------------------------------------------------------===//
9869   return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
9870                                     N->getOffset(), Flag);
9876   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
9882   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
9883                                    N->getOffset(), Flag);
9889   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
9895   return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
9910           ->hasELFSignedGOT())
9960   const GlobalValue *GV = GN->getGlobal();
9961   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
9964     assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
10021   assert(Subtarget->isTargetDarwin() &&
10027   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
10052   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
10053   const uint32_t *Mask = TRI->getTLSCallPreservedMask();
10054   if (Subtarget->hasCustomCallingConv())
10055     TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
10067   // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
10068   if (DAG.getMachineFunction().getFunction().hasFnAttribute("ptrauth-calls")) {
10082 /// Convert a thread-local variable reference into a sequence of instructions to
10173 /// When accessing thread-local variables under either the general-dynamic or
10174 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
10175 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
10188 ///  Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
10200       DAG.getMachineFunction().getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()
10212   assert(Subtarget->isTargetELF() && "This function expects an ELF target");
10218   TLSModel::Model Model = MFI->hasELFSignedGOT()
10220                               : getTargetMachine().getTLSModel(GA->getGlobal());
10241   const GlobalValue *GV = GA->getGlobal();
10251     // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
10257     MFI->incNumLocalDynamicTLSAccesses();
10266     // thread-local area.
10270     // in its thread-storage area.
10301   assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
10341   const GlobalValue *GV = GA->getGlobal();
10363   if (Subtarget->isTargetDarwin())
10365   if (Subtarget->isTargetELF())
10367   if (Subtarget->isTargetWindows())
10373 //===----------------------------------------------------------------------===//
10377 // - MOVaddrPAC: similar to MOVaddr, with added PAC.
10381 // - LOADgotPAC: similar to LOADgot, with added PAC.
10384 //   section is assumed to be read-only (for example, via relro mechanism). See
10387 // - LOADauthptrstatic: similar to LOADgot, but use a
10395 // provide integrity guarantees on the to-be-signed intermediate values.
10398 // with often similarly-signed pointers, making it a good harvesting target.
10406   assert(TGN->getGlobal()->hasExternalWeakLinkage());
10412   if (TGN->getOffset() != 0)
10414         "unsupported non-zero offset in weak ptrauth global reference");
10417     report_fatal_error("unsupported weak addr-div ptrauth global");
10439   // Blend only works if the integer discriminator is 16-bit wide.
10444   // Choosing between 3 lowering alternatives is target-specific.
10445   if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
10454   const GlobalValue *PtrGV = PtrN->getGlobal();
10458       Subtarget->ClassifyGlobalReference(PtrGV, getTargetMachine());
10461          "unsupported non-GOT op flags on ptrauth global reference");
10464   PtrOffsetC += PtrN->getOffset();
10467   assert(PtrN->getTargetFlags() == 0 &&
10476   // No GOT load needed -> MOVaddrPAC
10478     assert(!PtrGV->hasExternalWeakLinkage() && "extern_weak should use GOT");
10485   // GOT load -> LOADgotPAC
10487   if (!PtrGV->hasExternalWeakLinkage())
10493   // extern_weak ref -> LOADauthptrstatic
10505             cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
10510             Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
10512   return {Val, Val.getValueSizeInBits() - 1};
10517   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
10549     if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
10572     if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
10576         // out of bounds, a late MI-layer pass rewrites branches.
10592         // out of bounds, a late MI-layer pass rewrites branches.
10615     if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
10654   if (!Subtarget->isNeonAvailable() &&
10655       !Subtarget->useSVEForFixedLengthVectors())
10674       useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
10693   auto SetVecVal = [&](int Idx = -1) {
10725   // 64-bit elements. Instead, materialize all bits set and then negate that.
10753       useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
10756   if (!Subtarget->isNeonAvailable())
10772   //  FMOV    D0, X0        // copy 64-bit int to vector, high bits zero'd
10773   //  CNT     V0.8B, V0.8B  // 8xbyte pop-counts
10774   //  ADDV    B0, V0.8B     // sum 8xbyte pop-counts
10811   if (Subtarget->hasDotProd() && VT.getScalarSizeInBits() != 16 &&
10848              VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()));
10881           VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
10908           VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
10959   // Skip the one-use zext
10960   if (N->getOpcode() == ISD::ZERO_EXTEND && N->hasOneUse())
10961     N = N->getOperand(0);
10964   if (N->getOpcode() == ISD::XOR) {
10965     WorkList.push_back(std::make_pair(N->getOperand(0), N->getOperand(1)));
10970   // All the non-leaf nodes must be OR.
10971   if (N->getOpcode() != ISD::OR || !N->hasOneUse())
10974   if (isOrXorChain(N->getOperand(0), Num, WorkList) &&
10975       isOrXorChain(N->getOperand(1), Num, WorkList))
10982   SDValue LHS = N->getOperand(0);
10983   SDValue RHS = N->getOperand(1);
10985   EVT VT = N->getValueType(0);
10989   if (N->getOpcode() != ISD::SETCC)
10992   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10997       LHS->getOpcode() == ISD::OR && LHS->hasOneUse() &&
11021   bool IsStrict = Op->isStrictFPOpcode();
11029   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
11127   ISD::CondCode Cond = cast<CondCodeSDNode>(Op.getOperand(3))->get();
11155   if ((LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
11169     // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
11170     // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
11172     if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
11173         CTVal->isOne() && CFVal->isAllOnes() &&
11178                       DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
11183     // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
11184     // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
11187         RHSC && RHSC->isZero() && CFVal && CFVal->isZero() &&
11192                       DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
11204     if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
11208     } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
11229       const int64_t TrueVal = CTVal->getSExtValue();
11230       const int64_t FalseVal = CFVal->getSExtValue();
11239                  TrueVal == -FalseVal) {
11242         // If our operands are only 32-bit wide, make sure we use 32-bit
11246         // 64-bit arithmetic).
11247         const uint32_t TrueVal32 = CTVal->getZExtValue();
11248         const uint32_t FalseVal32 = CFVal->getZExtValue();
11258         // 64-bit check whether we can use CSINC.
11291     if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
11292         !RHSVal->isZero() && !RHSVal->isAllOnes()) {
11300     } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
11302       // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
11334     if (RHSVal && RHSVal->isZero()) {
11339           CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
11342                CFVal && CFVal->isZero() &&
11374   // -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
11384     // Create a predicate where all but the last -IdxVal elements are false.
11404   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
11415   SDValue CCVal = Op->getOperand(0);
11416   SDValue TVal = Op->getOperand(1);
11417   SDValue FVal = Op->getOperand(2);
11435   if (useSVEForFixedLengthVectorVT(Ty, !Subtarget->isNeonAvailable())) {
11450     if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
11468     CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
11477   if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
11486   if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
11501       !Subtarget->isTargetMachO())
11515   int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
11518   AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
11520   // With aarch64-jump-table-hardening, we only expand the jump table dispatch
11523           "aarch64-jump-table-hardening")) {
11525     if (Subtarget->isTargetMachO()) {
11527         report_fatal_error("Unsupported code-model for hardened jump-table");
11530       assert(Subtarget->isTargetELF() &&
11533         report_fatal_error("Unsupported code-model for hardened jump-table");
11556   // Skip over the jump-table BRINDs, where the destination is JumpTableDest32.
11557   if (Dest->isMachineOpcode() &&
11558       Dest->getMachineOpcode() == AArch64::JumpTableDest32)
11563       Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.getFunction());
11584     if (Subtarget->isTargetMachO()) {
11598   const BlockAddress *BA = BAN->getBlockAddress();
11601           Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
11602               *BA->getFunction())) {
11606     SDValue TargetBA = DAG.getTargetBlockAddress(BA, BAN->getValueType(0));
11621   if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) {
11636   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
11639   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11651   if (Subtarget->isWindowsArm64EC()) {
11653     // relative to x4. For a normal AArch64->AArch64 call, x4 == sp on entry,
11658     if (FuncInfo->getVarArgsGPRSize() > 0)
11659       StackOffset = -(uint64_t)FuncInfo->getVarArgsGPRSize();
11661       StackOffset = FuncInfo->getVarArgsStackOffset();
11665     FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
11666                                ? FuncInfo->getVarArgsGPRIndex()
11667                                : FuncInfo->getVarArgsStackIndex(),
11670   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11681   unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
11688   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11693   SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
11700   int GPRSize = FuncInfo->getVarArgsGPRSize();
11707     GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
11719   int FPRSize = FuncInfo->getVarArgsFPRSize();
11725     VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
11740       DAG.getStore(Chain, DL, DAG.getSignedConstant(-GPRSize, DL, MVT::i32),
11748       DAG.getStore(Chain, DL, DAG.getSignedConstant(-FPRSize, DL, MVT::i32),
11759   if (Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()))
11761   else if (Subtarget->isTargetDarwin())
11772   unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
11774       (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
11776           : Subtarget->isTargetILP32() ? 20 : 32;
11777   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
11778   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
11782                        Align(PtrSize), false, false, /*CI=*/nullptr,
11788   assert(Subtarget->isTargetDarwin() &&
11791   const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11797   unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
11811                          DAG.getConstant(Align->value() - 1, DL, PtrVT));
11813                          DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
11867   while (Depth--)
11871   if (Subtarget->isTargetILP32())
11897     const AArch64RegisterInfo *MRI = Subtarget->getRegisterInfo();
11898     unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
11899     if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
11900         !MRI->isReservedReg(MF, Reg))
11941     // live-in.
11946   // The XPACLRI instruction assembles to a hint-space instruction before
11947   // Armv8.3-A therefore this instruction can be safely used for any pre
11948   // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
11951   if (Subtarget->hasPAuth()) {
11962 /// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
11981   // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
11982   // 16-bit case when target has full fp16 support.
11991     IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
11993     IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
11996         (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
12014     unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
12023 //===----------------------------------------------------------------------===//
12025 //===----------------------------------------------------------------------===//
12031   if ((ST->hasNEON() &&
12035       (ST->hasSVE() &&
12040       // the initial estimate is 2^-8.  Thus the number of extra steps to refine
12047                        : Log2_64_Ceil(DesiredBits) - Log2_64_Ceil(AccurateBits);
12078       (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
12086       // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
12087       // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
12088       for (int i = ExtraSteps; i > 0; --i) {
12115       // Newton reciprocal iteration: E * (2 - X * E)
12116       // AArch64 reciprocal iteration instruction: (2 - M * N)
12117       for (int i = ExtraSteps; i > 0; --i) {
12130 //===----------------------------------------------------------------------===//
12132 //===----------------------------------------------------------------------===//
12138 // r - A general register
12139 // w - An FP/SIMD register of some size in the range v0-v31
12140 // x - An FP/SIMD register of some size in the range v0-v15
12141 // I - Constant that can be used with an ADD instruction
12142 // J - Constant that can be used with a SUB instruction
12143 // K - Constant that can be used with a 32-bit logical instruction
12144 // L - Constant that can be used with a 64-bit logical instruction
12145 // M - Constant that can be used as a 32-bit MOV immediate
12146 // N - Constant that can be used as a 64-bit MOV immediate
12147 // Q - A memory reference with base register and no offset
12148 // S - A symbolic address
12149 // Y - Floating point constant zero
12150 // Z - Integer constant zero
12152 //   Note that general register operands will be output using their 64-bit x
12154 // is prefixed by the %w modifier. Floating-point and SIMD register operands
12165   if (!Subtarget->hasFPARMv8())
12188 // not what we want. The code here pre-empts this by matching the register
12196   Constraint = Constraint.substr(2, Constraint.size() - 3);
12267 // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands
12332 /// getConstraintType - Given a constraint letter, return the type of
12382   Type *type = CallOperandVal->getType();
12391     if (type->isFloatingPointTy() || type->isVectorTy())
12414       if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
12420       if (!Subtarget->hasFPARMv8())
12441     // only take 128-bit registers so just use that regclass.
12443       if (!Subtarget->hasFPARMv8())
12451       if (!Subtarget->hasFPARMv8())
12489         tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
12491       bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
12493         // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
12494         // By default we'll emit v0-v31 for this unless there's a modifier where
12507   if (Res.second && !Subtarget->hasFPARMv8() &&
12518   if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
12524 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
12571     uint64_t CVal = C->getZExtValue();
12577     // instruction [or vice versa], in other words -1 to -4095 with optional
12584       uint64_t NVal = -C->getSExtValue();
12586         CVal = C->getSExtValue();
12594     // distinguish between bit patterns that are valid 32-bit or 64-bit
12608     // also match 32 or 64-bit immediates that can be loaded either using a
12609     // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
12610     // (M) or 64-bit 0x1234000000000000 (N) etc.
12654     // All assembler immediates are 64-bit integers.
12667 //===----------------------------------------------------------------------===//
12669 //===----------------------------------------------------------------------===//
12671 /// WidenVector - Given a value in the V64 register class, produce the
12684 /// getExtFactor - Determine the adjustment factor for the position when
12734       MaskSource = MaskSource->getOperand(0);
12750         !cast<ConstantSDNode>(MaskIdx)->getConstantIntValue()->equalsInt(i))
12756       MaskSourceVec = MaskSource->getOperand(0);
12759     } else if (MaskSourceVec != MaskSource->getOperand(0)) {
12767   // of elements in the source, or we would have an out-of-bounds access.
12830                     "various elements of other fixed-width vectors, provided "
12843     Source->MinElt = std::min(Source->MinElt, EltNo);
12844     Source->MaxElt = std::max(Source->MaxElt, EltNo);
12858           Mask.push_back(-1);
12963     if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
12974       Src.WindowBase = -NumSrcElts;
13000       Src.WindowBase = -Src.MinElt;
13030   SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
13038     int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
13052     int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
13053     ExtractBase += NumElts * (Src - Sources.begin());
13189   SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
13206   // NumEltsPerBlock with some values possibly replaced by undef-s.
13208   // Find first non-undef element
13211          "Shuffle with all-undefs must have been caught by previous cases, "
13219   size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
13224   size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
13243   // Look for the first non-undef element.
13254     return Elt != ExpectedElt++ && Elt != -1;
13262   // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
13263   // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
13269   // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
13270   // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
13276     Imm -= NumElts;
13281 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
13300 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
13319 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
13341   int LastLHSMismatch = -1, LastRHSMismatch = -1;
13344     if (M[i] == -1) {
13361   if (NumLHSMatch == NumInputElements - 1) {
13365   } else if (NumRHSMatch == NumInputElements - 1) {
13399   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
13422 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
13423 /// the specified operations to build the shuffle. ID is the perfect-shuffle
13432   unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
13433   unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
13463     auto getPFIDLane = [](unsigned ID, int Elt) -> int {
13465       Elt = 3 - Elt;
13468         Elt--;
13470       return (ID % 9 == 8) ? -1 : ID % 9;
13487       if (MaskElt == -1)
13488         MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
13490       ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
13504       ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
13536     // vrev <4 x i16> -> REV32
13541     // vrev <4 x i8> -> REV16
13563     SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
13569     unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
13605   // out of range values with 0s. We do need to make sure that any out-of-range
13606   // values are really out-of-range for a v16i8 vector.
13620         Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
13694     // Can't handle cases where vector size is not 128-bit
13702     // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
13704     // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
13705     // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
13718     // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
13723     // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
13725     Lane -= Idx * VT.getVectorNumElements() / 2;
13728     // Widen the operand to 128-bit register with undef.
13751   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
13753   // If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
13780   SDValue Tbl1 = Op->getOperand(0);
13781   SDValue Tbl2 = Op->getOperand(1);
13787   if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13788       Tbl1->getOperand(0) != Tbl2ID ||
13789       Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13790       Tbl2->getOperand(0) != Tbl2ID)
13793   if (Tbl1->getValueType(0) != MVT::v16i8 ||
13794       Tbl2->getValueType(0) != MVT::v16i8)
13797   SDValue Mask1 = Tbl1->getOperand(3);
13798   SDValue Mask2 = Tbl2->getOperand(3);
13802       TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
13805           dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
13808       TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);
13817                      {ID, Tbl1->getOperand(1), Tbl1->getOperand(2),
13818                       Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
13821 // Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
13823 // so custom-lower it as ZIP1-with-zeros.
13834   // FIXME: support multi-step zipping?
13849   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
13852   // Convert shuffles that are directly supported on NEON to target-specific
13856   ArrayRef<int> ShuffleMask = SVN->getMask();
13868   if (SVN->isSplat()) {
13869     int Lane = SVN->getSplatIndex();
13871     if (Lane == -1)
13877     // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
13931   } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
13978       SrcLane -= NumElts;
13997   // the PerfectShuffle-generated table to synthesize it from other shuffles.
14039   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14074   // Current lowering only supports the SVE-ACLE types.
14083   if (CIdx && (CIdx->getZExtValue() <= 3)) {
14084     SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
14085     return DAG.getNode(AArch64ISD::DUPLANE128, DL, VT, Op.getOperand(1), CI);
14114   EVT VT = BVN->getValueType(0);
14118   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14134 // Try 64-bit splatted SIMD immediate.
14155 // Try 32-bit splatted SIMD immediate.
14208 // Try 16-bit splatted SIMD immediate.
14253 // Try 32-bit splatted SIMD immediate with shifted ones.
14284 // Try 8-bit splatted SIMD immediate.
14344   ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
14347   EVT VT = Bvec->getValueType(0);
14350     if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
14352   ConstVal = FirstElt->getZExtValue();
14386   // If we're compiling for a specific vector-length, we can check if the
14406 //   - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
14407 //   - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
14410   EVT VT = N->getValueType(0);
14420   SDValue FirstOp = N->getOperand(0);
14422   SDValue SecondOp = N->getOperand(1);
14463     C2 = C2node->getZExtValue();
14478     C1AsAPInt = ~(C1nodeImm->getAPIntValue() << C1nodeShift->getAPIntValue());
14501   LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
14502   LLVM_DEBUG(N->dump(&DAG));
14504   LLVM_DEBUG(ResultSLI->dump(&DAG));
14513                                    !Subtarget->isNeonAvailable()))
14553   // We can always fall back to a non-immediate OR.
14570   for (SDValue Lane : Op->ops()) {
14572     // operands already. Otherwise, if Op is a floating-point splat
14577           CstLane->getAPIntValue().trunc(EltTy.getSizeInBits()).getZExtValue(),
14579     } else if (Lane.getNode()->isUndef()) {
14631       // FNegate each sub-element of the constant
14656         (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
14670   if (auto SeqInfo = BVN->isConstantSequence()) {
14671     SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
14672     SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
14679       NumElems <= 1 || BVN->isConstant())
14689       NumElems - count_if(Op->op_values(), IsExtractElt) > 4)
14692   // Lower (pow2) BUILD_VECTORS that are <= 128-bit to a sequence of ZIP1s.
14695       Op->op_values(), [&, Undef = DAG.getUNDEF(ContainerVT)](SDValue Op) {
14726   bool OverrideNEON = !Subtarget->isNeonAvailable() ||
14727                       cast<BuildVectorSDNode>(Op)->isConstantSequence();
14733   // Thought this might return a non-BUILD_VECTOR (e.g. CONCAT_VECTORS), if so,
14743   if (BVN->isConstant()) {
14744     if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
14747                 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
14751     if (ConstantFPSDNode *Const = BVN->getConstantFPSplatNode())
14752       if (Const->isZero() && !Const->isNegative())
14763   //   3) if only one constant value is used (w/ some non-constant lanes),
14765   //      in the non-constant lanes.
14767   //             select the values we'll be overwriting for the non-constant
14825     // ------------------------------------------------------------------
14840   // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
14857       if (!isa<ConstantSDNode>(N->getOperand(1))) {
14862       SDValue N0 = N->getOperand(0);
14880       uint64_t Val = N->getConstantOperandVal(1);
14885       if (Val - 1 == 2 * i) {
14910   // Use DUP for non-constant splats. For f32 constant splats, reduce to
14917             dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
14927             dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
14940                EltTy == MVT::f64) && "Unsupported floating-point vector type");
14957   // If we need to insert a small number of different non-constant elements and
14963       NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
14967   // start by splatting that value, then replace the non-constant lanes. This
14976       ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize);
14985     // Now insert the non-constant lanes.
15112   // worse. For a vector with one or two non-undef values, that's
15132     // vector element types. After type-legalization the inserted value is
15163                                    !Subtarget->isNeonAvailable()))
15171     unsigned NumOperands = Op->getNumOperands();
15179     SmallVector<SDValue> ConcatOps(Op->ops());
15202                                    !Subtarget->isNeonAvailable()))
15223   // Check for non-constant or out of range lane.
15224   ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
15225   if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
15250   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
15253   // Check for non-constant or out of range lane.
15254   ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15255   if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
15303     // If this is extracting the upper 64-bits of a 128-bit vector, we match
15305     if (Idx * InVT.getScalarSizeInBits() == 64 && Subtarget->isNeonAvailable())
15310       useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) {
15318       SDValue Container = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PackedVT,
15321       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Container, Idx);
15369                          DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
15444       !isa<ConstantSDNode>(Op->getOperand(0)))
15447   SplatVal = Op->getConstantOperandVal(0);
15456   if (isPowerOf2_64(-SplatVal)) {
15457     SplatVal = -SplatVal;
15481         DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
15492   // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
15517   if (!Subtarget->isNeonAvailable())
15524   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
15561 /// getVShiftImm - Check if this is a valid build_vector for the immediate
15572   if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
15580 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
15589   return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
15592 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
15621                                    !Subtarget->isNeonAvailable()))
15634   if (Shift->getOpcode() != ISD::SRL)
15641       dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Shift->getOperand(1)));
15645   ShiftValue = ShiftOp1->getZExtValue();
15649   SDValue Add = Shift->getOperand(0);
15650   if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse())
15656   uint64_t ExtraBits = VT.getScalarSizeInBits() - ResVT.getScalarSizeInBits();
15657   if (ShiftValue > ExtraBits && !Add->getFlags().hasNoUnsignedWrap())
15661       dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Add->getOperand(1)));
15664   uint64_t AddValue = AddOp1->getZExtValue();
15665   if (AddValue != 1ULL << (ShiftValue - 1))
15668   RShOperand = Add->getOperand(0);
15685         useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
15698         (Subtarget->hasSVE2() ||
15699          (Subtarget->hasSME() && Subtarget->isStreaming()))) {
15709         useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
15720                          DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
15754   bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
15859                                    !Subtarget->isNeonAvailable()))
15862   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
15876   // Lower isnan(x) | isnan(never-nan) to x != x.
15877   // Lower !isnan(x) & !isnan(never-nan) to x == x.
15920   bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
15971          "Expected power-of-2 length vector");
16072   bool OverrideNEON = !Subtarget->isNeonAvailable() ||
16081           SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
16148   // LSE has an atomic load-clear instruction, but not a load-and.
16155   return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
16157                        AN->getMemOperand());
16170       cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
16171   EVT VT = Node->getValueType(0);
16174           "no-stack-arg-probe")) {
16180                        DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
16189   SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
16192   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
16193   const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
16194   if (Subtarget->hasCustomCallingConv())
16195     TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
16206   // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
16217                      DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
16235       cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
16237   EVT VT = Node->getValueType(0);
16245                      DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
16258   if (Subtarget->isTargetWindows())
16268   if (Subtarget->hasSVE2())
16290               AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
16293   const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
16298     assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
16302   Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
16304   Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
16311 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
16334     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
16335     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
16352     unsigned NumElts = StructTy->getNumElements();
16353     Type *VecTy = StructTy->getElementType(0);
16355     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts);
16356     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
16372       Type *ArgTy = Arg->getType();
16373       if (!ArgTy->isVectorTy())
16377     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
16378     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
16391     Type *VecTy = I.getArgOperand(0)->getType();
16395       Type *ArgTy = Arg->getType();
16396       if (!ArgTy->isVectorTy())
16401     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts);
16402     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
16450     Type *ElTy = cast<VectorType>(I.getType())->getElementType();
16461         cast<VectorType>(I.getArgOperand(0)->getType())->getElementType();
16463     Info.memVT = MVT::getVT(I.getOperand(0)->getType());
16474     Info.memVT = MVT::getVT(Val->getType());
16505   const SDValue &Base = Mem->getBasePtr();
16510     // It's unknown whether a scalable vector has a power-of-2 bitwidth.
16511     if (Mem->getMemoryVT().isScalableVector())
16516     uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
16527   if ((VT == MVT::i64 || VT == MVT::i32) && Extend->use_size()) {
16540 // Truncations from 64-bit GPR to 32-bit GPR is free.
16542   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16544   uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedValue();
16545   uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedValue();
16560   if (I->getOpcode() != Instruction::FMul)
16563   if (!I->hasOneUse())
16566   Instruction *User = I->user_back();
16568   if (!(User->getOpcode() == Instruction::FSub ||
16569         User->getOpcode() == Instruction::FAdd))
16573   const Function *F = I->getFunction();
16574   const DataLayout &DL = F->getDataLayout();
16575   Type *Ty = User->getOperand(0)->getType();
16583 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
16584 // 64-bit GPR.
16586   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16588   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16589   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16609   // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
16620   if (Ext->getType()->isVectorTy())
16623   for (const Use &U : Ext->uses()) {
16631     switch (Instr->getOpcode()) {
16633       if (!isa<ConstantInt>(Instr->getOperand(1)))
16638       auto &DL = Ext->getDataLayout();
16639       std::advance(GTI, U.getOperandNo()-1);
16642       // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
16644       // log2(sizeof(IdxTy)) - log2(8).
16645       if (IdxTy->isScalableTy())
16648           llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) -
16659       if (Instr->getType() == Ext->getOperand(0)->getType())
16689   for (unsigned I = IsLittleEndian ? 0 : Factor - 1; I < MaskLen; I += Factor)
16699   auto *SrcTy = cast<FixedVectorType>(Op->getType());
16700   unsigned NumElts = SrcTy->getNumElements();
16701   auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16702   auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16720   auto *SrcTy = cast<FixedVectorType>(Op->getType());
16721   auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16722   auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16725   if (!createTblShuffleMask(SrcWidth, DstWidth, SrcTy->getNumElements(),
16738   int NumElements = cast<FixedVectorType>(TI->getType())->getNumElements();
16739   auto *SrcTy = cast<FixedVectorType>(TI->getOperand(0)->getType());
16740   auto *DstTy = cast<FixedVectorType>(TI->getType());
16741   assert(SrcTy->getElementType()->isIntegerTy() &&
16742          "Non-integer type source vector element is not supported");
16743   assert(DstTy->getElementType()->isIntegerTy(8) &&
16746       cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16748       cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16760   // 0,8,16,..Y*8th bytes for the little-endian format
16766                          : Itr * TruncFactor + (TruncFactor - 1)));
16789         Builder.CreateShuffleVector(TI->getOperand(0), ShuffleLanes), VecTy));
16848   TI->replaceAllUsesWith(FinalResult);
16849   TI->eraseFromParent();
16856   if (!EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
16863   Function *F = I->getParent()->getParent();
16864   if (!L || L->getHeader() != I->getParent() || F->hasMinSize() ||
16865       F->hasOptSize())
16868   auto *SrcTy = dyn_cast<FixedVectorType>(I->getOperand(0)->getType());
16869   auto *DstTy = dyn_cast<FixedVectorType>(I->getType());
16877   if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
16878     auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
16884     // If the ZExt can be lowered to a single ZExt to the next power-of-2 and
16886     auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
16887     if (TTI.getCastInstrCost(I->getOpcode(), DstTy, TruncDstType,
16890       if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
16899     if (SrcWidth * 4 <= DstWidth && I->hasOneUser()) {
16900       auto *SingleUser = cast<Instruction>(*I->user_begin());
16905     if (DstTy->getScalarSizeInBits() >= 64)
16910         Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
16911         DstTy, Subtarget->isLittleEndian());
16914     ZExt->replaceAllUsesWith(Result);
16915     ZExt->eraseFromParent();
16920   if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
16921                   DstTy->getElementType()->isFloatTy()) ||
16922                  (SrcTy->getElementType()->isIntegerTy(16) &&
16923                   DstTy->getElementType()->isDoubleTy()))) {
16926         Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
16927         FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
16930     I->replaceAllUsesWith(UI);
16931     I->eraseFromParent();
16936   if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16937       DstTy->getElementType()->isFloatTy()) {
16939     auto *Shuffle = createTblShuffleForSExt(Builder, I->getOperand(0),
16941                                             Subtarget->isLittleEndian());
16946     I->replaceAllUsesWith(SI);
16947     I->eraseFromParent();
16955       (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
16956       SrcTy->getElementType()->isFloatTy() &&
16957       DstTy->getElementType()->isIntegerTy(8)) {
16959     auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
16962     I->replaceAllUsesWith(TruncI);
16963     I->eraseFromParent();
16964     createTblForTrunc(cast<TruncInst>(TruncI), Subtarget->isLittleEndian());
16970   // per lane of the input that is represented using 1,2,3 or 4 128-bit table
16973   if (TI && DstTy->getElementType()->isIntegerTy(8) &&
16974       ((SrcTy->getElementType()->isIntegerTy(32) ||
16975         SrcTy->getElementType()->isIntegerTy(64)) &&
16976        (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
16977     createTblForTrunc(TI, Subtarget->isLittleEndian());
17000   unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
17001   unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
17003     VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
17009   if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
17017   unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
17018   auto EC = VecTy->getElementCount();
17023   if (isa<FixedVectorType>(VecTy) && !Subtarget->isNeonAvailable() &&
17024       (!Subtarget->useSVEForFixedLengthVectors() ||
17029       !Subtarget->isSVEorStreamingSVEAvailable())
17046   if (Subtarget->useSVEForFixedLengthVectors()) {
17048         std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
17051          (!Subtarget->isNeonAvailable() || VecSize > 128))) {
17059   return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
17063   if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
17064     return ScalableVectorType::get(VTy->getElementType(), 2);
17066   if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
17067     return ScalableVectorType::get(VTy->getElementType(), 4);
17069   if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
17070     return ScalableVectorType::get(VTy->getElementType(), 8);
17072   if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
17073     return ScalableVectorType::get(VTy->getElementType(), 8);
17075   if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
17076     return ScalableVectorType::get(VTy->getElementType(), 2);
17078   if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
17079     return ScalableVectorType::get(VTy->getElementType(), 4);
17081   if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
17082     return ScalableVectorType::get(VTy->getElementType(), 8);
17084   if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
17085     return ScalableVectorType::get(VTy->getElementType(), 16);
17101     return Intrinsic::getOrInsertDeclaration(M, SVELoads[Factor - 2], {LDVTy});
17103   return Intrinsic::getOrInsertDeclaration(M, NEONLoads[Factor - 2],
17118     return Intrinsic::getOrInsertDeclaration(M, SVEStores[Factor - 2], {STVTy});
17120   return Intrinsic::getOrInsertDeclaration(M, NEONStores[Factor - 2],
17144   const DataLayout &DL = LI->getDataLayout();
17146   VectorType *VTy = Shuffles[0]->getType();
17159         return SI->hasOneUse() && match(SI->user_back(), m_UIToFP(m_Value())) &&
17160                SI->getType()->getScalarSizeInBits() * 4 ==
17161                    SI->user_back()->getType()->getScalarSizeInBits();
17171   Type *EltTy = FVTy->getElementType();
17172   if (EltTy->isPointerTy())
17174         FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
17176   // If we're going to generate more than one load, reset the sub-vector type
17178   FVTy = FixedVectorType::get(FVTy->getElementType(),
17179                               FVTy->getNumElements() / NumLoads);
17187   Value *BaseAddr = LI->getPointerOperand();
17189   Type *PtrTy = LI->getPointerOperandType();
17190   Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
17191                                  LDVTy->getElementCount());
17193   Function *LdNFunc = getStructuredLoadFunction(LI->getModule(), Factor,
17196   // Holds sub-vectors extracted from the load intrinsic return values. The
17197   // sub-vectors are associated with the shufflevector instructions they will
17204         getSVEPredPatternFromNumElements(FVTy->getNumElements());
17205     if (Subtarget->getMinSVEVectorSizeInBits() ==
17206             Subtarget->getMaxSVEVectorSizeInBits() &&
17207         Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
17211         ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
17221       BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
17222                                             FVTy->getNumElements() * Factor);
17230     // Extract and store the sub-vectors returned by the load intrinsic.
17240             ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0));
17243       if (EltTy->isPointerTy())
17245             SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
17246                                          FVTy->getNumElements()));
17252   // Replace uses of the shufflevector instructions with the sub-vectors
17254   // associated with more than one sub-vector, those sub-vectors will be
17260     SVI->replaceAllUsesWith(WideVec);
17272       Ptr->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
17275     if (It->isDebugOrPseudoInst())
17277     if (MaxLookupDist-- == 0)
17281           SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
17284           (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.sextOrTrunc(IdxWidth))
17326   auto *VecTy = cast<FixedVectorType>(SVI->getType());
17327   assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
17329   unsigned LaneLen = VecTy->getNumElements() / Factor;
17330   Type *EltTy = VecTy->getElementType();
17333   const DataLayout &DL = SI->getDataLayout();
17344   Value *Op0 = SVI->getOperand(0);
17345   Value *Op1 = SVI->getOperand(1);
17350   if (EltTy->isPointerTy()) {
17353         cast<FixedVectorType>(Op0->getType())->getNumElements();
17364   // and sub-vector type to something legal.
17366   SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
17372   Value *BaseAddr = SI->getPointerOperand();
17374   auto Mask = SVI->getShuffleMask();
17377   // If mask is `poison`, `Mask` may be a vector of -1s.
17384   // that points to BaseAddr+16 or BaseAddr-16 then it can be better left as a
17386   if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
17388        hasNearbyPairedStore(SI->getIterator(), SI->getParent()->end(), BaseAddr,
17390        hasNearbyPairedStore(SI->getReverseIterator(), SI->getParent()->rend(),
17394   Type *PtrTy = SI->getPointerOperandType();
17395   Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
17396                                  STVTy->getElementCount());
17398   Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
17404         getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
17405     if (Subtarget->getMinSVEVectorSizeInBits() ==
17406             Subtarget->getMaxSVEVectorSizeInBits() &&
17407         Subtarget->getMinSVEVectorSizeInBits() ==
17412         ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
17433             StartMask = Mask[IdxJ] - j;
17449             ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));
17460       BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
17477   VectorType *VTy = cast<VectorType>(DeinterleavedValues[0]->getType());
17479   const DataLayout &DL = LI->getModule()->getDataLayout();
17485   // the code from lowerInterleavedLoad to obtain the correct container type.
17486   if (UseScalable && !VTy->isScalableTy())
17491       VectorType::get(VTy->getElementType(),
17492                       VTy->getElementCount().divideCoefficientBy(NumLoads));
17494   Type *PtrTy = LI->getPointerOperandType();
17495   Function *LdNFunc = getStructuredLoadFunction(LI->getModule(), Factor,
17502         Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue());
17504   Value *BaseAddr = LI->getPointerOperand();
17518           Builder.getInt64(I * LdTy->getElementCount().getKnownMinValue());
17523       LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump());
17527       DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]);
17537       DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
17551   VectorType *VTy = cast<VectorType>(InterleavedValues[0]->getType());
17552   const DataLayout &DL = SI->getModule()->getDataLayout();
17559   // the code from lowerInterleavedStore to obtain the correct container type.
17560   if (UseScalable && !VTy->isScalableTy())
17566       VectorType::get(VTy->getElementType(),
17567                       VTy->getElementCount().divideCoefficientBy(NumStores));
17569   Type *PtrTy = SI->getPointerOperandType();
17570   Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
17575   Value *BaseAddr = SI->getPointerOperand();
17580         Builder.CreateVectorSplat(StTy->getElementCount(), Builder.getTrue());
17594           Builder.getInt64(I * StTy->getElementCount().getKnownMinValue());
17600       StoreOperands[StoreOperands.size() - 1] = Address;
17610   bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17611   bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17612   // Only use AdvSIMD to implement memset of 32-byte and above. It would have
17640   bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17641   bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17642   // Only use AdvSIMD to implement memset of 32-byte and above. It would have
17667 // 12-bit optionally shifted immediates are legal for adds.
17685   if (!Subtarget->hasSVE2())
17716 // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
17729   const int64_t C1 = C1Node->getSExtValue();
17730   const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
17750 /// isLegalAddressingMode - Return true if the addressing mode represented
17757   //  reg + 9-bit signed offset
17758   //  reg + SIZE_IN_BYTES * 12-bit unsigned offset
17789   if (Ty->isScalableTy()) {
17791       // See if we have a foldable vscale-based offset, for vector types which
17802           DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
17810   // No scalable offsets allowed for non-scalable types.
17817   if (Ty->isSized()) {
17824   return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.BaseOffs,
17856     return Subtarget->hasFullFP16();
17869   switch (Ty->getScalarType()->getTypeID()) {
17886   // LR is a callee-save register, but we must treat it as clobbered by any call
17888   // as implicit-defs for stackmaps and patchpoints.
17903   assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17904           N->getOpcode() == ISD::SRL) &&
17907   SDValue ShiftLHS = N->getOperand(0);
17908   EVT VT = N->getValueType(0);
17910   if (!ShiftLHS->hasOneUse())
17914       !ShiftLHS.getOperand(0)->hasOneUse())
17927           if (N->getOpcode() == ISD::SHL)
17928             if (auto *SHLC = dyn_cast<ConstantSDNode>(N->getOperand(1)))
17929               return SRLC->getZExtValue() == SHLC->getZExtValue();
17940   assert(N->getOpcode() == ISD::XOR &&
17941          (N->getOperand(0).getOpcode() == ISD::SHL ||
17942           N->getOperand(0).getOpcode() == ISD::SRL) &&
17946   auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
17947   auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
17950     if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
17951       unsigned ShiftAmt = ShiftC->getZExtValue();
17952       unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17953       if (N->getOperand(0).getOpcode() == ISD::SHL)
17954         return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
17955       return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
17964   assert(((N->getOpcode() == ISD::SHL &&
17965            N->getOperand(0).getOpcode() == ISD::SRL) ||
17966           (N->getOpcode() == ISD::SRL &&
17967            N->getOperand(0).getOpcode() == ISD::SHL)) &&
17968          "Expected shift-shift mask");
17970   if (!N->getOperand(0)->hasOneUse())
17974   EVT VT = N->getValueType(0);
17975   if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
17976     auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
17977     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17978     return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
17983   if (N->getOpcode() == ISD::SHL && N->hasOneUse()) {
17984     if (auto C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
17985       unsigned ShlAmt = C2->getZExtValue();
17986       if (auto ShouldADD = *N->user_begin();
17987           ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
17988         if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
17989           unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
17991               isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
18008   assert(Ty->isIntegerTy());
18010   unsigned BitSize = Ty->getPrimitiveSizeInBits();
18021     Val &= (1LL << 32) - 1;
18037 ///   xor (sra X, elt_size(X)-1), -1
18042   EVT VT = N->getValueType(0);
18043   if (!Subtarget->hasNEON() || !VT.isVector())
18048   SDValue Shift = N->getOperand(0);
18049   SDValue Ones = N->getOperand(1);
18057   if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
18081   if (N->getValueType(0) != MVT::i32)
18084   SDValue VecReduceOp0 = N->getOperand(0);
18087   if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
18092   if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
18093       ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
18096   SDValue SUB = ABS->getOperand(0);
18097   unsigned Opcode0 = SUB->getOperand(0).getOpcode();
18098   unsigned Opcode1 = SUB->getOperand(1).getOpcode();
18100   if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
18101       SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
18113   SDValue EXT0 = SUB->getOperand(0);
18114   SDValue EXT1 = SUB->getOperand(1);
18116   if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
18117       EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
18125       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
18128       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
18136       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
18139       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
18161   if (!ST->isNeonAvailable())
18164   if (!ST->hasDotProd())
18167   SDValue Op0 = N->getOperand(0);
18168   if (N->getValueType(0) != MVT::i32 || Op0.getValueType().isScalableVT() ||
18194       if (!ST->hasMatMulInt8())
18215   // For non-mla reductions B can be set to 1. For MLA we take the operand of
18237     return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
18260       DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), ConcatSDot16);
18276       DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
18277   return DAG.getNode(ISD::ADD, DL, N->getValueType(0), VecReduceAdd16,
18332 // We can convert a UADDV(add(zext(64-bit source), zext(64-bit source))) into
18333 // UADDLV(concat), where the concat represents the 64-bit zext sources.
18335   // Look for add(zext(64-bit source), zext(64-bit source)), returning
18349   // Check zext VTs are the same and 64-bit length.
18373   SDValue A = N->getOperand(0);
18376       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
18397   if (isIntDivCheap(N->getValueType(0), Attr))
18400   EVT VT = N->getValueType(0);
18405       (VT.isFixedLengthVector() && Subtarget->useSVEForFixedLengthVectors()))
18413   // If the divisor is 2 or -2, the default expansion is better. It will add
18414   // (N->getValueType(0) >> (BitWidth - 1)) to it before shifting right.
18416       Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true))
18427   if (isIntDivCheap(N->getValueType(0), Attr))
18430   EVT VT = N->getValueType(0);
18434   if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
18447   SDValue N0 = N->getOperand(0);
18448   SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
18492 /// Calculates what the pre-extend type is, based on the extension
18496 /// pre-extend type is pulled directly from the operand, while other extend
18515     return TypeNode->getVT();
18523     uint32_t Mask = Constant->getZExtValue();
18550   SDValue Extend = BV->getOperand(0);
18565   // Restrict valid pre-extend data type
18573   for (SDValue Op : drop_begin(BV->ops())) {
18601     for (SDValue Op : BV->ops())
18612                                cast<ShuffleVectorSDNode>(BV)->getMask());
18624   EVT VT = Mul->getValueType(0);
18628   SDValue Op0 = performBuildShuffleExtendCombine(Mul->getOperand(0), DAG);
18629   SDValue Op1 = performBuildShuffleExtendCombine(Mul->getOperand(1), DAG);
18636   return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0),
18637                      Op1 ? Op1 : Mul->getOperand(1));
18640 // Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz
18643   EVT VT = N->getValueType(0);
18647   if (N->getOperand(0).getOpcode() != ISD::AND ||
18648       N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
18651   SDValue And = N->getOperand(0);
18655   if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
18662       V3 != (HalfSize - 1))
18680   EVT VT = N->getValueType(0);
18682       (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
18683        N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) ||
18684       (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
18685        N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) ||
18686       N->getOperand(0).getOperand(0).getValueType() !=
18687           N->getOperand(1).getOperand(0).getValueType())
18690   if (N->getOpcode() == ISD::MUL &&
18691       N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
18694   SDValue N0 = N->getOperand(0).getOperand(0);
18695   SDValue N1 = N->getOperand(1).getOperand(0);
18706     SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0);
18707     SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1);
18708     SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1);
18709     return DAG.getNode(N->getOpcode() == ISD::MUL ? N->getOperand(0).getOpcode()
18730   // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
18732   // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
18734   EVT VT = N->getValueType(0);
18735   SDValue N0 = N->getOperand(0);
18736   SDValue N1 = N->getOperand(1);
18740   auto IsAddSubWith1 = [&](SDValue V) -> bool {
18741     AddSubOpc = V->getOpcode();
18742     if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
18743       SDValue Opnd = V->getOperand(1);
18744       MulOper = V->getOperand(0);
18748         return C->isOne();
18768   const APInt &ConstValue = C->getAPIntValue();
18773      (N0->getOpcode() == ISD::TRUNCATE &&
18774       (IsSVECntIntrinsic(N0->getOperand(0)))))
18781   // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
18782   // 64-bit is 5 cycles, so this is always a win.
18794     if (N0->hasOneUse() && (isSignExtended(N0, DAG) ||
18799     if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ADD ||
18800                            N->user_begin()->getOpcode() == ISD::SUB))
18835   // C = 45 is equal to (1+4)*(1+8), we don't decompose it into (1+2)*(16-1) as
18836   // the (2^N - 1) can't be execused via a single instruction.
18843       APInt NVMinus1 = N - 1;
18853   // C = 11 is equal to (1+4)*2+1, we don't decompose it into (1+2)*4-1 as
18854   // the (2^N - 1) can't be execused via a single instruction.
18856     APInt CVMinus1 = C - 1;
18860     APInt SCVMinus1 = CVMinus1.ashr(TrailingZeroes) - 1;
18870   // Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
18871   // C = 29 is equal to 1 - (1 - 2^3) * 2^2.
18873     APInt CVMinus1 = C - 1;
18889     // (mul x, 2^N - 1) => (sub (shl x, N), x)
18890     // (mul x, (2^(N-M) - 1) * 2^M) => (sub (shl x, N), (shl x, M))
18895     // (mul x, 1 - (1 - 2^M) * 2^N))
18896     //     =>  MV = sub (x - (shl x, M)); sub (x - (shl MV, N))
18897     APInt SCVMinus1 = ShiftedConstValue - 1;
18911     if (Subtarget->hasALULSLFast() &&
18913       APInt CVMMinus1 = CVM - 1;
18914       APInt CVNMinus1 = CVN - 1;
18923     if (Subtarget->hasALULSLFast() &&
18934     if (Subtarget->hasALULSLFast() &&
18945     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18946     // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
18947     // (mul x, -(2^(N-M) - 1) * 2^M) => (sub (shl x, M), (shl x, N))
18948     APInt SCVPlus1 = -ShiftedConstValue + 1;
18949     APInt CVNegPlus1 = -ConstValue + 1;
18950     APInt CVNegMinus1 = -ConstValue - 1;
18968   // Take advantage of vector comparisons producing 0 or -1 in each lane to
18972   //    UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
18979   EVT VT = N->getValueType(0);
18980   if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
18981       N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
18982       VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
18986   // make the transformation for non-constant splats as well, but it's unclear
18990           dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
18992     if (!BV->isConstant())
18997     EVT IntVT = BV->getValueType(0);
19000     SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
19004                                  N->getOperand(0)->getOperand(0), MaskConst);
19012 /// Tries to replace scalar FP <-> INT conversions with SVE in streaming
19018   if (N->isStrictFPOpcode())
19024   if (!Subtarget->isSVEorStreamingSVEAvailable() ||
19025       (!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
19032   SDValue SrcVal = N->getOperand(0);
19034   EVT DestTy = N->getValueType(0);
19057   SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec);
19073   EVT VT = N->getValueType(0);
19078   if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
19081   // If the result of an integer load is only used by an integer-to-float
19083   // This eliminates an "integer-to-vector-move" UOP and improves throughput.
19084   SDValue N0 = N->getOperand(0);
19085   if (Subtarget->isNeonAvailable() && ISD::isNormalLoad(N0.getNode()) &&
19088       !cast<LoadSDNode>(N0)->isVolatile()) {
19090     SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
19091                                LN0->getPointerInfo(), LN0->getAlign(),
19092                                LN0->getMemOperand()->getFlags());
19099         (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
19106 /// Fold a floating-point multiply by power of two into floating-point to
19107 /// fixed-point conversion.
19115   if (!Subtarget->isNeonAvailable())
19118   if (!N->getValueType(0).isSimple())
19121   SDValue Op = N->getOperand(0);
19128   SDValue ConstVec = Op->getOperand(1);
19135       (FloatBits != 16 || !Subtarget->hasFullFP16()))
19138   MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
19143   // Avoid conversions where iN is larger than the float (e.g., float -> i64).
19150   int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
19151   if (C == -1 || C == 0 || C > Bits)
19158   if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
19159       N->getOpcode() == ISD::FP_TO_UINT_SAT) {
19160     EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
19166   bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
19167                    N->getOpcode() == ISD::FP_TO_SINT_SAT);
19173                   Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
19176     FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
19183   EVT VT = N->getValueType(0);
19198   SDValue N0 = N->getOperand(0);
19202   SDValue N1 = N->getOperand(1);
19206   // InstCombine does (not (neg a)) => (add a -1).
19207   // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
19209   for (int i = 1; i >= 0; --i) {
19210     for (int j = 1; j >= 0; --j) {
19211       SDValue O0 = N0->getOperand(i);
19212       SDValue O1 = N1->getOperand(j);
19219         SubSibling = N0->getOperand(1 - i);
19220         AddSibling = N1->getOperand(1 - j);
19224         AddSibling = N0->getOperand(1 - i);
19225         SubSibling = N1->getOperand(1 - j);
19247   uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
19248   for (int i = 1; i >= 0; --i)
19249     for (int j = 1; j >= 0; --j) {
19252       if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) &&
19253           ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) &&
19255         return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
19256                            N0->getOperand(1 - i), N1->getOperand(1 - j));
19258       BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
19259       BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
19265         ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
19266         ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
19268             CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
19274         return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
19275                            N0->getOperand(1 - i), N1->getOperand(1 - j));
19292   EVT VT = N->getValueType(0);
19293   SDValue CSel0 = N->getOperand(0);
19294   SDValue CSel1 = N->getOperand(1);
19300   if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
19313   if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
19328   if (N->getOpcode() == ISD::AND) {
19341   if (Op1 && Op1->getAPIntValue().isNegative() &&
19342       Op1->getAPIntValue().sgt(-32)) {
19344     // if the Op1 is a constant in the range [-31, -1], we
19347         DAG.getConstant(Op1->getAPIntValue().abs(), DL, Op1->getValueType(0));
19363   EVT VT = N->getValueType(0);
19397   if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
19398     if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
19399       return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
19406   SDValue Op = N->getOperand(0);
19409     Op = Op->getOperand(0);
19418   SDValue Src = N->getOperand(0);
19419   unsigned Opc = Src->getOpcode();
19423     SDValue UnpkOp = Src->getOperand(0);
19424     SDValue Dup = N->getOperand(1);
19430     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
19434     uint64_t ExtVal = C->getZExtValue();
19436     auto MaskAndTypeMatch = [ExtVal](EVT VT) -> bool {
19442     // If the mask is fully covered by the unpack, we don't need to push
19444     EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
19448     // If this is 'and (uunpklo/hi (extload MemTy -> ExtTy)), mask', then check
19449     // to see if the mask is all-ones of size MemTy.
19451     if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() == ISD::ZEXTLOAD ||
19452                          MaskedLoadOp->getExtensionType() == ISD::EXTLOAD)) {
19453       EVT EltTy = MaskedLoadOp->getMemoryVT().getVectorElementType();
19459     APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
19463     Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
19467                               UnpkOp->getValueType(0), UnpkOp, Dup);
19469     return DAG.getNode(Opc, DL, N->getValueType(0), And);
19477   if (isAllActivePredicate(DAG, N->getOperand(0)))
19478     return N->getOperand(1);
19479   if (isAllActivePredicate(DAG, N->getOperand(1)))
19480     return N->getOperand(0);
19485   SDValue Mask = N->getOperand(1);
19492   // SVE load instructions perform an implicit zero-extend, which makes them
19498     MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
19515     MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
19534   SDValue SetCC = N->getOperand(0);
19535   EVT VT = N->getValueType(0);
19541   for (auto U : N->users())
19542     if (U->getOpcode() == ISD::SELECT)
19545   // Check if the operand is a SETCC node with floating-point comparison
19570   SDValue LHS = N->getOperand(0);
19571   SDValue RHS = N->getOperand(1);
19572   EVT VT = N->getValueType(0);
19633   SDValue LHS = N->getOperand(0);
19634   SDValue RHS = N->getOperand(1);
19635   EVT VT = N->getValueType(0);
19638   if (!N->getFlags().hasAllowReassociation())
19641   // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
19653         DAG.getNode(ISD::FADD, DL, VT, A.getOperand(1), B, N->getFlags()),
19655     VCMLA->setFlags(A->getFlags());
19705   assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19707   if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
19710   SDValue N0 = N->getOperand(0);
19714       !isNullConstant(N->getOperand(1)))
19718   // flag-setting operation.
19725   return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE);
19728 // Materialize : Idx = (add (mul vscale, NumEls), -1)
19735   assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19737   if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
19740   SDValue N0 = N->getOperand(0);
19746   // Idx == (add (mul vscale, NumEls), -1)
19747   SDValue Idx = N->getOperand(1);
19759   // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
19762   return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE);
19768   assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19775   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
19777   EVT VT = N->getValueType(0);
19779   bool IsStrict = N0->isStrictFPOpcode();
19781   // extract(dup x) -> x
19790   // ->
19795   if (isNullConstant(N1) && hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
19798     SDValue N00 = N0->getOperand(IsStrict ? 1 : 0);
19799     SDValue N01 = N0->getOperand(IsStrict ? 2 : 1);
19810     if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
19811         Other == Shuffle->getOperand(0)) {
19817         return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2);
19823       SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
19825                                 {N0->getOperand(0), Extract1, Extract2});
19839   EVT VT = N->getValueType(0);
19840   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
19841   unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
19846   if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
19848     SDValue N00 = N0->getOperand(0);
19849     SDValue N10 = N1->getOperand(0);
19857     // ->
19861     // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
19863     // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
19892         N00->getOperand(1) == N10->getOperand(1)) {
19893       SDValue N000 = N00->getOperand(0);
19894       SDValue N100 = N10->getOperand(0);
19895       uint64_t N001ConstVal = N00->getConstantOperandVal(1),
19896                N101ConstVal = N10->getConstantOperandVal(1),
19897                NScalarSize = N->getValueType(0).getScalarSizeInBits();
19904             DAG.getConstant(N001ConstVal - NScalarSize, dl, MVT::i32);
19911   if (N->getOperand(0).getValueType() == MVT::v4i8 ||
19912       N->getOperand(0).getValueType() == MVT::v2i16 ||
19913       N->getOperand(0).getValueType() == MVT::v2i8) {
19914     EVT SrcVT = N->getOperand(0).getValueType();
19918     if (N->getNumOperands() % 2 == 0 &&
19919         all_of(N->op_values(), [SrcVT](SDValue V) {
19925           return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
19926                  LD->getExtensionType() == ISD::NON_EXTLOAD;
19929       EVT NVT = EVT::getVectorVT(*DAG.getContext(), FVT, N->getNumOperands());
19932       for (unsigned i = 0; i < N->getNumOperands(); i++) {
19933         SDValue V = N->getOperand(i);
19938           SDValue NewLoad = DAG.getLoad(FVT, dl, LD->getChain(),
19939                                         LD->getBasePtr(), LD->getMemOperand());
19944       return DAG.getBitcast(N->getValueType(0),
19954   // ->
19957   if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
19958       N1Opc == ISD::TRUNCATE && N->isOnlyUserOf(N0.getNode()) &&
19959       N->isOnlyUserOf(N1.getNode())) {
19961       return V->getOpcode() == ISD::XOR &&
19964     SDValue N00 = N0->getOperand(0);
19965     SDValue N10 = N1->getOperand(0);
19966     if (isBitwiseVectorNegate(N00) && N0->isOnlyUserOf(N00.getNode()) &&
19967         isBitwiseVectorNegate(N10) && N1->isOnlyUserOf(N10.getNode())) {
19972                                   N00->getOperand(0)),
19974                                   N10->getOperand(0))),
19984   // Optimise concat_vectors of two identical binops with a 128-bit destination
19986   // concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d))
19987   if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() &&
19988       DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() &&
19989       N1->hasOneUse()) {
19990     SDValue N00 = N0->getOperand(0);
19991     SDValue N01 = N0->getOperand(1);
19992     SDValue N10 = N1->getOperand(0);
19993     SDValue N11 = N1->getOperand(1);
20023     if (Imm != 1ULL << (ShtAmt - 1))
20028   // concat(rshrn(x), rshrn(y)) -> rshrn(concat(x, y))
20029   if (N->getNumOperands() == 2 && IsRSHRN(N0) &&
20041         DAG.getConstant(1ULL << (N0.getConstantOperandVal(1) - 1), dl, BVT));
20048   if (N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
20061   if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
20067   // Canonicalise concat_vectors so that the right-hand vector has as few
20068   // bit-casts as possible before its real operation. The primary matching
20070   // which depend on the operation being performed on this right-hand vector.
20076   if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
20078   SDValue RHS = N1->getOperand(0);
20085       dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
20101   EVT VT = N->getValueType(0);
20105   SDValue V = N->getOperand(0);
20108   // blocks this combine because the non-const case requires custom lowering.
20110   // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
20122   SDValue Vec = N->getOperand(0);
20123   SDValue SubVec = N->getOperand(1);
20124   uint64_t IdxVal = N->getConstantOperandVal(2);
20144   // Fold insert_subvector -> concat_vectors
20145   // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
20146   // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
20173   // register allocator to avoid cross-class register copies that aren't
20177   SDValue Op1 = N->getOperand(1);
20182   SDValue IID = N->getOperand(0);
20183   SDValue Shift = N->getOperand(2);
20186   EVT ResTy = N->getValueType(0);
20208 // AArch64 high-vector "long" operations are formed by performing the non-high
20217 //  (dupv64 scalar) --> (extract_high (dup128 scalar))
20221 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
20257     N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops());
20313     SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
20319   // - csel 1, 0, cc
20320   // - csel 0, 1, !cc
20341   if (!TValue->isOne()) {
20347   return TValue->isOne() && FValue->isZero();
20355     isSetCC(Op->getOperand(0), Info));
20360 //   -->
20365   assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
20366   SDValue LHS = Op->getOperand(0);
20367   SDValue RHS = Op->getOperand(1);
20386                   ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
20387                   : InfoAndKind.Info.Generic.Opnd0->getValueType();
20405   EVT VT = Op->getValueType(0);
20410 // ADD(UADDV a, UADDV b) -->  UADDV(ADD a, b)
20412   EVT VT = N->getValueType(0);
20414   if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
20417   SDValue LHS = N->getOperand(0);
20418   SDValue RHS = N->getOperand(1);
20423   auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
20424   auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
20425   if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
20428   SDValue Op1 = LHS->getOperand(0);
20429   SDValue Op2 = RHS->getOperand(0);
20439   EVT ValVT = Val1->getValueType(0);
20449 ///   CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc)
20451   EVT VT = N->getValueType(0);
20452   if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD)
20455   SDValue LHS = N->getOperand(0);
20456   SDValue RHS = N->getOperand(1);
20482         (CTVal->isOne() || CFVal->isOne())) &&
20484         (CTVal->isOne() || CFVal->isAllOnes())))
20488   if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() &&
20489       !CFVal->isOne()) {
20495   // Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc)
20496   if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() &&
20497       !CFVal->isAllOnes()) {
20498     APInt C = -1 * CFVal->getAPIntValue();
20506   APInt ADDC = CTVal->getAPIntValue();
20511   assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) ||
20512           (LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) &&
20522 // ADD(UDOT(zero, x, y), A) -->  UDOT(A, x, y)
20524   EVT VT = N->getValueType(0);
20525   if (N->getOpcode() != ISD::ADD)
20528   SDValue Dot = N->getOperand(0);
20529   SDValue A = N->getOperand(1);
20558 // (neg (csel X, Y)) -> (csel (neg X), (neg Y))
20567   SDValue CSel = N->getOperand(1);
20568   if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse())
20589 // which act on the high-half of their inputs. They are normally matched by
20594 // -> uaddl2 vD, vN, vM
20605   MVT VT = N->getSimpleValueType(0);
20607     if (N->getOpcode() == ISD::ADD)
20613   SDValue LHS = N->getOperand(0);
20614   SDValue RHS = N->getOperand(1);
20638   return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
20643          !Op.getNode()->hasAnyUseOfValue(0);
20667   SDValue CmpOp = Op->getOperand(2);
20679   SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1);
20684   return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(),
20685                      Op->getOperand(0), Op->getOperand(1),
20691   SDValue LHS = N->getOperand(0);
20692   SDValue RHS = N->getOperand(1);
20693   SDValue Cond = N->getOperand(2);
20698   EVT VT = N->getValueType(0);
20710   EVT VT = N->getValueType(0);
20714     SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1),
20715             Elt2 = N->getOperand(2), Elt3 = N->getOperand(3);
20716     if (Elt0->getOpcode() == ISD::FP_ROUND &&
20717         Elt1->getOpcode() == ISD::FP_ROUND &&
20718         isa<ConstantSDNode>(Elt0->getOperand(1)) &&
20719         isa<ConstantSDNode>(Elt1->getOperand(1)) &&
20720         Elt0->getConstantOperandVal(1) == Elt1->getConstantOperandVal(1) &&
20721         Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20722         Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20724         isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) &&
20725         isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) &&
20726         Elt0->getOperand(0)->getOperand(0) ==
20727             Elt1->getOperand(0)->getOperand(0) &&
20728         Elt0->getOperand(0)->getConstantOperandVal(1) == 0 &&
20729         Elt1->getOperand(0)->getConstantOperandVal(1) == 1) {
20730       SDValue LowLanesSrcVec = Elt0->getOperand(0)->getOperand(0);
20733         if (Elt2->getOpcode() == ISD::UNDEF &&
20734             Elt3->getOpcode() == ISD::UNDEF) {
20736         } else if (Elt2->getOpcode() == ISD::FP_ROUND &&
20737                    Elt3->getOpcode() == ISD::FP_ROUND &&
20738                    isa<ConstantSDNode>(Elt2->getOperand(1)) &&
20739                    isa<ConstantSDNode>(Elt3->getOperand(1)) &&
20740                    Elt2->getConstantOperandVal(1) ==
20741                        Elt3->getConstantOperandVal(1) &&
20742                    Elt2->getOperand(0)->getOpcode() ==
20744                    Elt3->getOperand(0)->getOpcode() ==
20747                    isa<ConstantSDNode>(Elt2->getOperand(0)->getOperand(1)) &&
20748                    isa<ConstantSDNode>(Elt3->getOperand(0)->getOperand(1)) &&
20749                    Elt2->getOperand(0)->getOperand(0) ==
20750                        Elt3->getOperand(0)->getOperand(0) &&
20751                    Elt2->getOperand(0)->getConstantOperandVal(1) == 0 &&
20752                    Elt3->getOperand(0)->getConstantOperandVal(1) == 1) {
20753           SDValue HighLanesSrcVec = Elt2->getOperand(0)->getOperand(0);
20763                              Elt0->getOperand(1));
20770     SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
20771     if (Elt0->getOpcode() == ISD::FP_EXTEND &&
20772         Elt1->getOpcode() == ISD::FP_EXTEND &&
20773         Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20774         Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20775         Elt0->getOperand(0)->getOperand(0) ==
20776             Elt1->getOperand(0)->getOperand(0) &&
20778         isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) &&
20779         isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) &&
20780         Elt0->getOperand(0)->getConstantOperandVal(1) + 1 ==
20781             Elt1->getOperand(0)->getConstantOperandVal(1) &&
20784         Elt0->getOperand(0)->getConstantOperandVal(1) %
20787       SDValue SrcVec = Elt0->getOperand(0)->getOperand(0);
20792         SDValue SubvectorIdx = Elt0->getOperand(0)->getOperand(1);
20802   // extract subvector where the inner vector is any-extended to the
20813   SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
20814   // Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT.
20815   if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20816       Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20818       isa<ConstantSDNode>(Elt0->getOperand(1)) &&
20819       isa<ConstantSDNode>(Elt1->getOperand(1)) &&
20821       Elt0->getOperand(0) == Elt1->getOperand(0) &&
20823       Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1) &&
20826       Elt0->getConstantOperandVal(1) % VT.getVectorMinNumElements() == 0) {
20827     SDValue VecToExtend = Elt0->getOperand(0);
20832     SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
20845   EVT VT = N->getValueType(0);
20846   SDValue N0 = N->getOperand(0);
20857   // i32 (trunc (extract Vi64, idx)) -> i32 (extract (nvcast Vi32), idx*2))
20877         cast<ConstantSDNode>(ExtractIndexNode)->getZExtValue();
20894       SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
20903     uint64_t AndMask = CSD->getZExtValue();
20912 // (N - Y) + Z --> (Z - Y) + N
20935   EVT VT = N->getValueType(0);
20946   if (N->getOpcode() != ISD::ADD)
20951   EVT VT = N->getValueType(0);
20956   SDValue LHS = N->getOperand(0);
20957   SDValue RHS = N->getOperand(1);
20971   // with LSL (shift > 4). For the rest of processors, this is no-op for
20984   if (N->getOpcode() != ISD::SUB)
20987   SDValue Add = N->getOperand(1);
20988   SDValue X = N->getOperand(0);
21006   EVT VT = N->getValueType(0);
21028   if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::SUB)
21031   if (!N->getValueType(0).isFixedLengthVector())
21034   auto performOpt = [&DAG, &N](SDValue Op0, SDValue Op1) -> SDValue {
21038     if (!cast<ConstantSDNode>(Op1->getOperand(1))->isZero())
21041     SDValue MulValue = Op1->getOperand(0);
21054         DAG.getNode(N->getOpcode(), SDLoc(N), ScalableVT, {ScaledOp, MulValue});
21055     return convertFromScalableVector(DAG, N->getValueType(0), NewValue);
21058   if (SDValue res = performOpt(N->getOperand(0), N->getOperand(1)))
21060   else if (N->getOpcode() == ISD::ADD)
21061     return performOpt(N->getOperand(1), N->getOperand(0));
21069   EVT VT = N->getValueType(0);
21071       DAG.getTargetLoweringInfo().isOperationExpand(N->getOpcode(), MVT::v1i64))
21073   SDValue Op0 = N->getOperand(0);
21074   SDValue Op1 = N->getOperand(1);
21098                      DAG.getNode(N->getOpcode(), DL, MVT::v1i64, Op0, Op1),
21104   if (!BV->hasOneUse())
21107     if (!Ld || !Ld->isSimple())
21115       if (!Ld || !Ld->isSimple() || !BV.getOperand(Op).hasOneUse())
21146       if (SV1->getMaskElt(I) != I ||
21147           SV1->getMaskElt(I + NumSubElts) != I + NumSubElts ||
21148           SV1->getMaskElt(I + NumSubElts * 2) != I + NumSubElts * 2 ||
21149           SV1->getMaskElt(I + NumSubElts * 3) != I + NumElts)
21152       if (SV2->getMaskElt(I) != I ||
21153           SV2->getMaskElt(I + NumSubElts) != I + NumSubElts ||
21154           SV2->getMaskElt(I + NumSubElts * 2) != I + NumElts)
21157     auto *Ld0 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(0));
21158     auto *Ld1 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(1));
21159     auto *Ld2 = dyn_cast<LoadSDNode>(SV2->getOperand(1).getOperand(0));
21161     if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() ||
21162         !Ld2->isSimple() || !Ld3->isSimple())
21187              unsigned Size = get<0>(L)->getValueType(0).getSizeInBits();
21188              return Size == get<1>(L)->getValueType(0).getSizeInBits() &&
21229   EVT VT = N->getValueType(0);
21235   SDValue Other = N->getOperand(0);
21236   SDValue Shift = N->getOperand(1);
21237   if (Shift.getOpcode() != ISD::SHL && N->getOpcode() != ISD::SUB)
21286             SDValue Load = DAG.getLoad(DLoadVT, SDLoc(L0), L0->getChain(),
21287                                        L0->getBasePtr(), L0->getPointerInfo(),
21288                                        L0->getOriginalAlign());
21297         for (const auto &[O0, O1] : zip(Op0->op_values(), Op1->op_values()))
21343   return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
21374 // Massage DAGs which we can use the high-half "long" operations on into
21377 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
21387   SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
21388   SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
21394   // just as well use the non-high version) so look for a corresponding extract
21408     return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
21410   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
21411                      N->getOperand(0), LHS, RHS);
21415   MVT ElemTy = N->getSimpleValueType(0).getScalarType();
21419   if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
21423     if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
21429   } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
21430     ShiftAmount = CVN->getSExtValue();
21436     return N->getOperand(1);
21471       ShiftAmount = -ShiftAmount;
21478   EVT VT = N->getValueType(0);
21479   SDValue Op = N->getOperand(1);
21486   if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
21488                      DAG.getConstant(-ShiftAmount, dl, MVT::i32));
21489     if (N->getValueType(0) == MVT::i64)
21496     if (N->getValueType(0) == MVT::i64)
21509   SDValue AndN = N->getOperand(2);
21514   if (!CMask || CMask->getZExtValue() != Mask)
21518                      N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
21524   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
21526                                  N->getOperand(1).getSimpleValueType(),
21527                                  N->getOperand(1)),
21533   SDValue Op1 = N->getOperand(1);
21534   SDValue Op2 = N->getOperand(2);
21540   SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
21541   SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
21542   SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
21543   SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
21544   return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
21549   SDValue Scalar = N->getOperand(3);
21555   SDValue Passthru = N->getOperand(1);
21556   SDValue Pred = N->getOperand(2);
21557   return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
21564   EVT VT = N->getValueType(0);
21568   // Current lowering only supports the SVE-ACLE types.
21578   SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
21579   SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
21580   SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
21593   SDValue Comparator = N->getOperand(3);
21597     EVT VT = N->getValueType(0);
21598     EVT CmpVT = N->getOperand(2).getValueType();
21599     SDValue Pred = N->getOperand(1);
21616         int64_t ImmVal = CN->getSExtValue();
21617         if (ImmVal >= -16 && ImmVal <= 15)
21630         uint64_t ImmVal = CN->getZExtValue();
21645                        N->getOperand(2), Splat, DAG.getCondCode(CC));
21693   SDValue Pred = N->getOperand(1);
21694   SDValue VecToReduce = N->getOperand(2);
21698   EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
21704   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21712   SDValue Pred = N->getOperand(1);
21713   SDValue VecToReduce = N->getOperand(2);
21721   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21729   SDValue Pred = N->getOperand(1);
21730   SDValue InitVal = N->getOperand(2);
21731   SDValue VecToReduce = N->getOperand(3);
21744   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21754   assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
21755   assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
21756   SDValue Pg = N->getOperand(1);
21757   SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
21758   SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
21763       return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
21765     return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
21778   if (!Subtarget->hasSVE2p1())
21781   if (!N->hasNUsesOfValue(2, 0))
21784   const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
21788   auto It = N->user_begin();
21792   if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
21793       Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR)
21796   uint64_t OffLo = Lo->getConstantOperandVal(1);
21797   uint64_t OffHi = Hi->getConstantOperandVal(1);
21807   EVT HalfVec = Lo->getValueType(0);
21808   if (HalfVec != Hi->getValueType(0) ||
21816   SDValue Idx = N->getOperand(1);
21817   SDValue TC = N->getOperand(2);
21824                   {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
21836   assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
21841   bool Scalable = N->getValueType(0).isScalableVector();
21842   if (Scalable && !Subtarget->isSVEorStreamingSVEAvailable())
21844   if (!Scalable && (!Subtarget->isNeonAvailable() || !Subtarget->hasDotProd()))
21849   SDValue Op2 = N->getOperand(2);
21850   unsigned Op2Opcode = Op2->getOpcode();
21855     MulOpLHS = Op2->getOperand(0);
21858     SDValue ExtMulOpLHS = Op2->getOperand(0);
21859     SDValue ExtMulOpRHS = Op2->getOperand(1);
21861     unsigned ExtMulOpLHSOpcode = ExtMulOpLHS->getOpcode();
21862     unsigned ExtMulOpRHSOpcode = ExtMulOpRHS->getOpcode();
21870     MulOpLHS = ExtMulOpLHS->getOperand(0);
21871     MulOpRHS = ExtMulOpRHS->getOperand(0);
21878   SDValue Acc = N->getOperand(1);
21879   EVT ReducedVT = N->getValueType(0);
21895     if (!Subtarget->hasMatMulInt8())
21898     bool Scalable = N->getValueType(0).isScalableVT();
21931   assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
21936   if (!Subtarget->hasSVE2() && !Subtarget->isStreamingSVEAvailable())
21941   if (!ISD::isExtOpcode(N->getOperand(2).getOpcode()))
21943   SDValue Acc = N->getOperand(1);
21944   SDValue Ext = N->getOperand(2);
21950   SDValue ExtOp = Ext->getOperand(0);
21979     return DAG.getPartialReduceAdd(SDLoc(N), N->getValueType(0),
21980                                    N->getOperand(1), N->getOperand(2));
21998     return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
21999                        N->getOperand(1), N->getOperand(2));
22001     return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
22002                        N->getOperand(1), N->getOperand(2));
22004     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
22005                        N->getOperand(1), N->getOperand(2));
22007     return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
22008                        N->getOperand(1), N->getOperand(2));
22010     return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0),
22011                        N->getOperand(1), N->getOperand(2));
22013     return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0),
22014                        N->getOperand(1), N->getOperand(2));
22016     return DAG.getNode(AArch64ISD::PMULL, SDLoc(N), N->getValueType(0),
22017                        N->getOperand(1), N->getOperand(2));
22029     return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
22030                        N->getOperand(1), N->getOperand(2));
22032     return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
22033                        N->getOperand(1), N->getOperand(2));
22042     if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
22067     return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
22068                        N->getOperand(1));
22072     return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0),
22073                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22075     return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0),
22076                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22078     return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0),
22079                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22081     return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0),
22082                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22084     return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0),
22085                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22087     return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0),
22088                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22090     return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0),
22091                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22093     return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0),
22094                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22096     return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0),
22097                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22099     return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0),
22100                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22102     return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0),
22103                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22105     return DAG.getNode(AArch64ISD::FDIV_PRED, SDLoc(N), N->getValueType(0),
22106                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22108     return DAG.getNode(AArch64ISD::FMAX_PRED, SDLoc(N), N->getValueType(0),
22109                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22111     return DAG.getNode(AArch64ISD::FMAXNM_PRED, SDLoc(N), N->getValueType(0),
22112                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22114     return DAG.getNode(AArch64ISD::FMA_PRED, SDLoc(N), N->getValueType(0),
22115                        N->getOperand(1), N->getOperand(3), N->getOperand(4),
22116                        N->getOperand(2));
22118     return DAG.getNode(AArch64ISD::FMIN_PRED, SDLoc(N), N->getValueType(0),
22119                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22121     return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0),
22122                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22124     return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0),
22125                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22127     return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0),
22128                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22130     return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2),
22131                        N->getOperand(3));
22133     return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2),
22134                        N->getOperand(3));
22138     return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), N->getOperand(2),
22139                        N->getOperand(3));
22141     return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0),
22142                        N->getOperand(2), N->getOperand(3));
22144     return DAG.getNode(AArch64ISD::SADDWB, SDLoc(N), N->getValueType(0),
22145                        N->getOperand(1), N->getOperand(2));
22147     return DAG.getNode(AArch64ISD::SADDWT, SDLoc(N), N->getValueType(0),
22148                        N->getOperand(1), N->getOperand(2));
22150     return DAG.getNode(AArch64ISD::UADDWB, SDLoc(N), N->getValueType(0),
22151                        N->getOperand(1), N->getOperand(2));
22153     return DAG.getNode(AArch64ISD::UADDWT, SDLoc(N), N->getValueType(0),
22154                        N->getOperand(1), N->getOperand(2));
22156     return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2),
22157                        N->getOperand(3));
22159     return DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), N->getOperand(2),
22160                        N->getOperand(3));
22162     return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
22163                        N->getOperand(2), N->getOperand(3));
22165     return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
22166                        N->getOperand(2), N->getOperand(3));
22168     return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0),
22169                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22171     return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0),
22172                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22176     return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
22177                        N->getOperand(2), N->getOperand(3));
22181     return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
22182                        N->getOperand(2), N->getOperand(3));
22184     return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
22185                        N->getOperand(1), N->getOperand(2));
22187     return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
22188                        N->getOperand(1), N->getOperand(2));
22190     return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
22191                        N->getOperand(1), N->getOperand(2));
22193     return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
22194                        N->getOperand(1), N->getOperand(2));
22196     return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
22197                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22199     if (!N->getOperand(2).getValueType().isFloatingPoint())
22201                          N->getValueType(0), N->getOperand(1), N->getOperand(2),
22202                          N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
22205     if (!N->getOperand(2).getValueType().isFloatingPoint())
22207                          N->getValueType(0), N->getOperand(1), N->getOperand(2),
22208                          N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
22213                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
22214                        N->getOperand(3), DAG.getCondCode(ISD::SETGE));
22219                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
22220                        N->getOperand(3), DAG.getCondCode(ISD::SETGT));
22225                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
22226                        N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
22231                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
22232                        N->getOperand(3), DAG.getCondCode(ISD::SETNE));
22236                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
22237                        N->getOperand(3), DAG.getCondCode(ISD::SETUO));
22252     return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
22253                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
22275     return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
22278     return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
22281     return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
22290   unsigned OC = N->getOpcode();
22304   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
22305          N->getOperand(0)->getOpcode() == ISD::SETCC);
22306   const SDValue SetCC = N->getOperand(0);
22310   if (!CCOp0->getValueType(0).isInteger() ||
22311       !CCOp1->getValueType(0).isInteger())
22315       cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
22323         DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
22325         DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
22328         SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
22329         cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
22335 // Convert zext(extract(shuffle a, b, [0,4,8,12])) -> and(uzp1(a, b), 255)
22340   EVT VT = N->getValueType(0);
22342       N->getOpcode() != ISD::ZERO_EXTEND ||
22343       N->getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
22346   unsigned ExtOffset = N->getOperand(0).getConstantOperandVal(1);
22350   EVT InVT = N->getOperand(0).getOperand(0).getValueType();
22351   auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0).getOperand(0));
22359       Shuffle->getMask().slice(ExtOffset, VT.getVectorNumElements()), 4, Idx);
22366         Shuffle->getOperand(1).isUndef() &&
22368             Shuffle->getMask().slice(ExtOffset + VT.getVectorNumElements() / 2,
22375                             Shuffle->getOperand(IsUndefDeInterleave ? 1 : 0));
22377                             Shuffle->getOperand(IsUndefDeInterleave ? 0 : 1));
22385       DAG.getConstant((1 << InVT.getScalarSizeInBits()) - 1, DL, VT));
22396   EVT VT = N->getValueType(0);
22397   if (N->getOpcode() != ISD::ZERO_EXTEND ||
22401   SDValue Op = N->getOperand(0);
22402   unsigned ExtOffset = (unsigned)-1;
22428   if (ExtOffset == (unsigned)-1) {
22459   if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
22460       N->getOperand(0).getValueType().is64BitVector() &&
22461       (N->getOperand(0).getOpcode() == ISD::ABDU ||
22462        N->getOperand(0).getOpcode() == ISD::ABDS)) {
22463     SDNode *ABDNode = N->getOperand(0).getNode();
22469     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
22477   if (N->getValueType(0).isFixedLengthVector() &&
22478       N->getOpcode() == ISD::SIGN_EXTEND &&
22479       N->getOperand(0)->getOpcode() == ISD::SETCC)
22490   SDValue Bswap = N->getOperand(0);
22491   if (N->getOpcode() == ISD::ANY_EXTEND && Bswap.getOpcode() == ISD::BSWAP &&
22493       (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64)) {
22495     SDValue NewAnyExtend = DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0),
22496                                        Bswap->getOperand(0));
22497     return DAG.getNode(AArch64ISD::REV16, SDLoc(N), N->getValueType(0),
22521                    OrigAlignment, St.getMemOperand()->getFlags());
22524   if (BasePtr->getOpcode() == ISD::ADD &&
22525       isa<ConstantSDNode>(BasePtr->getOperand(1))) {
22526     BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
22527     BasePtr = BasePtr->getOperand(0);
22531   while (--NumVecElts) {
22538                           St.getMemOperand()->getFlags());
22551     llvm_unreachable("No known SVE container for this MVT type");
22576   EVT VT = N->getValueType(0);
22586   SDValue Ops[] = { N->getOperand(0), // Chain
22587                     N->getOperand(2), // Pg
22588                     N->getOperand(3), // Base
22602   EVT VT = N->getValueType(0);
22603   EVT PtrTy = N->getOperand(3).getValueType();
22611   SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
22612                                 MINode->getOperand(3), DAG.getUNDEF(PtrTy),
22613                                 MINode->getOperand(2), PassThru,
22614                                 MINode->getMemoryVT(), MINode->getMemOperand(),
22631   EVT VT = N->getValueType(0);
22637   SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
22649   SDValue Data = N->getOperand(2);
22663   SDValue Ops[] = { N->getOperand(0), // Chain
22665                     N->getOperand(4), // Base
22666                     N->getOperand(3), // Pg
22670   return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
22676   SDValue Data = N->getOperand(2);
22678   EVT PtrTy = N->getOperand(4).getValueType();
22684   return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
22685                             DAG.getUNDEF(PtrTy), MINode->getOperand(3),
22686                             MINode->getMemoryVT(), MINode->getMemOperand(),
22693 /// if the zero constant is not re-used, since one instructions and one register
22739     int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
22740     if (Offset < -512 || Offset > 504)
22794   std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
22811     uint64_t IndexVal = CIndex->getZExtValue();
22830   if (S->isVolatile() || S->isIndexed())
22833   SDValue StVal = S->getValue();
22849   if (!Subtarget->isMisaligned128StoreSlow())
22852   // Don't split at -Oz.
22857   // those up regresses performance on micro-benchmarks and olden/bh.
22866   if (VT.getSizeInBits() != 128 || S->getAlign() >= Align(16) ||
22867       S->getAlign() <= Align(2))
22885   SDValue BasePtr = S->getBasePtr();
22887       DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
22888                    S->getAlign(), S->getMemOperand()->getFlags());
22892                       S->getPointerInfo(), S->getAlign(),
22893                       S->getMemOperand()->getFlags());
22897   assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
22899   // splice(pg, op1, undef) -> op1
22900   if (N->getOperand(2).isUndef())
22901     return N->getOperand(1);
22908   assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
22909           N->getOpcode() == AArch64ISD::UUNPKLO) &&
22912   // uunpklo/hi undef -> undef
22913   if (N->getOperand(0).isUndef())
22914     return DAG.getUNDEF(N->getValueType(0));
22919   if (N->getOperand(0).getOpcode() == ISD::MLOAD &&
22920       N->getOpcode() == AArch64ISD::UUNPKLO) {
22921     MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0));
22922     SDValue Mask = MLD->getMask();
22925     if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD &&
22926         SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
22927         (MLD->getPassThru()->isUndef() ||
22928          isZerosVector(MLD->getPassThru().getNode()))) {
22929       unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
22930       unsigned PgPattern = Mask->getConstantOperandVal(0);
22931       EVT VT = N->getValueType(0);
22941             VT, DL, MLD->getChain(), MLD->getBasePtr(), MLD->getOffset(), Mask,
22942             PassThru, MLD->getMemoryVT(), MLD->getMemOperand(),
22943             MLD->getAddressingMode(), ISD::ZEXTLOAD);
22956   if (N->getOpcode() != AArch64ISD::UZP1)
22958   SDValue Op0 = N->getOperand(0);
22959   EVT SrcVT = Op0->getValueType(0);
22960   EVT DstVT = N->getValueType(0);
22968 //   uzp1(rshrnb(uunpklo(X),C), rshrnb(uunpkhi(X), C)) -> urshr(X, C)
22970   assert(N->getOpcode() == AArch64ISD::UZP1 && "Only UZP1 expected.");
22971   SDValue Op0 = N->getOperand(0);
22972   SDValue Op1 = N->getOperand(1);
22973   EVT ResVT = N->getValueType(0);
23001 //    t1 = nxv8i16 add(X, 1 << (ShiftValue - 1))
23011   EVT VT = Srl->getValueType(0);
23012   if (!VT.isScalableVector() || !Subtarget->hasSVE2())
23051   SDValue Op0 = N->getOperand(0);
23052   SDValue Op1 = N->getOperand(1);
23053   EVT ResVT = N->getValueType(0);
23055   // uzp(extract_lo(x), extract_hi(x)) -> extract_lo(uzp x, x)
23067       SDValue Uzp = DAG.getNode(N->getOpcode(), DL, WidenedResVT, SourceVec,
23075   if (N->getOpcode() == AArch64ISD::UZP2)
23078   // uzp1(x, undef) -> concat(truncate(x), undef)
23146   // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y)
23165   // truncating uzp1(x, y) -> xtn(concat (x, y))
23178   // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
23231   unsigned Opc = N->getOpcode();
23249   SDValue Chain = N->getOperand(0);
23250   SDValue Pg = N->getOperand(1);
23251   SDValue Base = N->getOperand(2);
23252   SDValue Offset = N->getOperand(3);
23253   SDValue Ty = N->getOperand(4);
23255   EVT ResVT = N->getValueType(0);
23267     EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
23269     // If the predicate for the sign- or zero-extended offset is the
23270     // same as the predicate used for this load and the sign-/zero-extension
23271     // was from a 32-bits...
23292   assert(N->getOpcode() == AArch64ISD::VASHR ||
23293          N->getOpcode() == AArch64ISD::VLSHR);
23295   SDValue Op = N->getOperand(0);
23298   unsigned ShiftImm = N->getConstantOperandVal(1);
23302   if (N->getOpcode() == AArch64ISD::VASHR &&
23304       N->getOperand(1) == Op.getOperand(1))
23309   if (N->getFlags().hasExact())
23322   // sunpklo(sext(pred)) -> sext(extract_low_half(pred))
23325   if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND &&
23326       N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
23328     SDValue CC = N->getOperand(0)->getOperand(0);
23329     auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
23332     return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk);
23338 /// Target-specific DAG combine function for post-increment LD1 (lane) and
23339 /// post-increment LD1R.
23347   EVT VT = N->getValueType(0);
23354   LoadSDNode *LD = dyn_cast<LoadSDNode>(N->getOperand(LoadIdx).getNode());
23358   // If the Generic combiner already helped form a pre- or post-indexed load,
23360   if (LD->isIndexed())
23366     Lane = N->getOperand(2);
23368     if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
23373   EVT MemVT = LoadSDN->getMemoryVT();
23380   for (SDUse &U : LD->uses()) {
23390   if (N->hasOneUse()) {
23391     unsigned UseOpc = N->user_begin()->getOpcode();
23396   SDValue Addr = LD->getOperand(1);
23397   SDValue Vector = N->getOperand(0);
23399   for (SDUse &Use : Addr->uses()) {
23401     if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
23405     SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
23407       uint32_t IncVal = CInc->getZExtValue();
23427     Ops.push_back(LD->getOperand(0));  // Chain
23440                                            LoadSDN->getMemOperand());
23474   assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
23478     if (!Store->isTruncatingStore() || Store->isIndexed())
23480     SDValue Ext = Store->getValue();
23485     SDValue Orig = Ext->getOperand(0);
23486     if (Store->getMemoryVT() != Orig.getValueType())
23488     return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
23489                         Store->getBasePtr(), Store->getMemOperand());
23512   EVT MemVT = LD->getMemoryVT();
23514       LD->getOriginalAlign() >= 4)
23519   SDValue Chain = LD->getChain();
23520   SDValue BasePtr = LD->getBasePtr();
23521   MachineMemOperand *MMO = LD->getMemOperand();
23522   assert(LD->getOffset().isUndef() && "undef offset expected");
23551 // nontemporal loads larger than 256-bits loads for odd types so LDNPQ 256-bit
23557   if (Subtarget->supportsAddressTopByteIgnored())
23558     performTBISimplification(N->getOperand(1), DCI, DAG);
23561   if (LD->isVolatile() || !Subtarget->isLittleEndian())
23567   if (!LD->isNonTemporal())
23570   EVT MemVT = LD->getMemoryVT();
23577   SDValue Chain = LD->getChain();
23578   SDValue BasePtr = LD->getBasePtr();
23579   SDNodeFlags Flags = LD->getFlags();
23582   // Replace any non temporal load over 256-bit with a series of 256 bit loads
23583   // and a scalar/vector load less than 256. This way we can utilize 256-bit
23589   // Create all 256-bit loads starting from offset 0 and up to Num256Loads-1*32.
23594     Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
23596         NewVT, DL, Chain, NewPtr, LD->getPointerInfo().getWithOffset(PtrOffset),
23597         NewAlign, LD->getMemOperand()->getFlags(), LD->getAAInfo());
23604   // 256-bit loads and inserting the remaining load to it. We extract the
23607   unsigned PtrOffset = (MemVT.getSizeInBits() - BitsRemaining) / 8;
23613   Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
23616                   LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
23617                   LD->getMemOperand()->getFlags(), LD->getAAInfo());
23653   for (SDValue Operand : Op->op_values()) {
23690       unsigned BitsPerElement = std::max(64 / NumElts, 8u); // >= 64-bit vector
23710     // create 8x 16-bit values, and the perform the vector reduce.
23730   unsigned MaxBitMask = 1u << (VecVT.getVectorNumElements() - 1);
23745   if (!Store->isTruncatingStore())
23749   SDValue VecOp = Store->getValue();
23751   EVT MemVT = Store->getMemoryVT();
23770   return DAG.getStore(Store->getChain(), DL, ExtendedBits, Store->getBasePtr(),
23771                       Store->getMemOperand());
23783   SDValue Value = ST->getValue();
23786   if (ST->isVolatile() || !Subtarget->isLittleEndian() ||
23791   assert(ST->getOffset().isUndef() && "undef offset expected");
23795       Value->getOperand(0).getValueType().getVectorElementType(), 4);
23799       {UndefVector, Value->getOperand(0), DAG.getVectorIdxConstant(0, DL)});
23805   SDValue Chain = ST->getChain();
23806   MachineMemOperand *MMO = ST->getMemOperand();
23811   SDValue Ptr2 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset2, DL);
23817   SDValue Ptr1 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset1, DL);
23822   Chain = DAG.getStore(Chain, DL, E0, ST->getBasePtr(),
23832   SDValue Chain = ST->getChain();
23833   SDValue Value = ST->getValue();
23834   SDValue Ptr = ST->getBasePtr();
23850       Value.getNode()->hasOneUse() && ST->isUnindexed() &&
23851       Subtarget->useSVEForFixedLengthVectors() &&
23853       ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
23856                              ST->getMemoryVT(), ST->getMemOperand());
23861   if (Subtarget->supportsAddressTopByteIgnored() &&
23862       performTBISimplification(N->getOperand(2), DCI, DAG))
23871   if (ST->isTruncatingStore()) {
23872     EVT StoreVT = ST->getMemoryVT();
23876             trySimplifySrlAddToRshrnb(ST->getOperand(1), DAG, Subtarget)) {
23877       return DAG.getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(),
23878                                StoreVT, ST->getMemOperand());
23890   SDValue Value = MST->getValue();
23891   SDValue Mask = MST->getMask();
23897   if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() &&
23898       MST->isUnindexed() && Mask->getOpcode() == AArch64ISD::PTRUE &&
23907         unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
23908         unsigned PgPattern = Mask->getConstantOperandVal(0);
23916           return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0),
23917                                     MST->getBasePtr(), MST->getOffset(), Mask,
23918                                     MST->getMemoryVT(), MST->getMemOperand(),
23919                                     MST->getAddressingMode(),
23926   if (MST->isTruncatingStore()) {
23927     EVT ValueVT = Value->getValueType(0);
23928     EVT MemVT = MST->getMemoryVT();
23932       return DAG.getMaskedStore(MST->getChain(), DL, Rshrnb, MST->getBasePtr(),
23933                                 MST->getOffset(), MST->getMask(),
23934                                 MST->getMemoryVT(), MST->getMemOperand(),
23935                                 MST->getAddressingMode(), true);
23953   // ->
23968   // ->
23999   while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG))
24009   EVT DataVT = N->getOperand(1).getValueType();
24011   // will later be re-extended to 64 bits in legalization
24014   if (ISD::isVectorShrinkable(Index.getNode(), 32, N->isIndexSigned())) {
24024     Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue();
24034       Stride = Step << Shift->getZExtValue();
24070   SDValue Chain = MGS->getChain();
24071   SDValue Scale = MGS->getScale();
24072   SDValue Index = MGS->getIndex();
24073   SDValue Mask = MGS->getMask();
24074   SDValue BasePtr = MGS->getBasePtr();
24075   ISD::MemIndexType IndexType = MGS->getIndexType();
24083     SDValue PassThru = MGT->getPassThru();
24086         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
24087         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
24090     SDValue Data = MSC->getValue();
24092     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
24093                                 DL, Ops, MSC->getMemOperand(), IndexType,
24094                                 MSC->isTruncatingStore());
24097   SDValue Ops[] = {Chain, HG->getInc(), Mask,          BasePtr,
24098                    Index, Scale,        HG->getIntID()};
24099   return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), HG->getMemoryVT(),
24100                                 DL, Ops, HG->getMemOperand(), IndexType);
24103 /// Target-specific DAG combine function for NEON load/store intrinsics
24111   unsigned AddrOpIdx = N->getNumOperands() - 1;
24112   SDValue Addr = N->getOperand(AddrOpIdx);
24115   for (SDUse &Use : Addr->uses()) {
24117     if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
24137     unsigned IntNo = N->getConstantOperandVal(1);
24186       VecTy = N->getOperand(2).getValueType();
24188       VecTy = N->getValueType(0);
24191     SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
24193       uint32_t IncVal = CInc->getZExtValue();
24202     Ops.push_back(N->getOperand(0)); // Incoming chain
24206         Ops.push_back(N->getOperand(i));
24222                                            MemInt->getMemoryVT(),
24223                                            MemInt->getMemOperand());
24244   switch(V.getNode()->getOpcode()) {
24249     if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
24250        || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
24251       ExtType = LoadNode->getExtensionType();
24257     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
24258     if ((TypeNode->getVT() == MVT::i8 && width == 8)
24259        || (TypeNode->getVT() == MVT::i16 && width == 16)) {
24266     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
24267     if ((TypeNode->getVT() == MVT::i8 && width == 8)
24268        || (TypeNode->getVT() == MVT::i16 && width == 16)) {
24276     return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
24277            1LL << (width - 1);
24288 //  +-------------+ +-------------+ +-------------+ +-------------+
24290 //  +-------------+ +-------------+ +-------------+ +-------------+
24292 //           V           V           |    +----------+
24293 //          +-------------+  +----+  |    |
24295 //          +-------------+  +----+  |    |
24298 //                 +-------------+   |    |
24300 //                 +-------------+   |    |
24302 //                      +-----+      |    |
24305 //                           +-------------+
24307 //                           +-------------+
24319 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
24349   // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
24359     AddConstant -= (1 << (width-1));
24365         (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
24380        (AddConstant <= 0 && CompConstant >= -1 &&
24420 // (X & C) >u Mask --> (X & (C & (~Mask)) != 0
24421 // (X & C) <u Pow2 --> (X & (C & ~(Pow2-1)) == 0
24426   ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
24430   APInt SubsAP = SubsC->getAPIntValue();
24440   ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
24444   APInt MaskAP = CC == AArch64CC::HI ? SubsAP : (SubsAP - 1);
24447   APInt AndSMask = (~MaskAP) & AndC->getAPIntValue();
24449       AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
24450       DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
24453                       N->getOperand(CCIndex)->getValueType(0));
24463   SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
24465   return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
24473   unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
24474   SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
24475   unsigned CondOpcode = SubsNode->getOpcode();
24477   if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0) ||
24478       !SubsNode->hasOneUse())
24484   SDNode *AndNode = SubsNode->getOperand(0).getNode();
24487   if (AndNode->getOpcode() != ISD::AND)
24494   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
24495     uint32_t CNV = CN->getZExtValue();
24505   SDValue AddValue = AndNode->getOperand(0);
24512   SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
24513   SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
24514   SDValue SubsInputValue = SubsNode->getOperand(1);
24531                 cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
24532                 cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
24537   SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
24538                                SubsNode->getValueType(1));
24539   SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
24560   SDValue Chain = N->getOperand(0);
24561   SDValue Dest = N->getOperand(1);
24562   SDValue CCVal = N->getOperand(2);
24563   SDValue Cmp = N->getOperand(3);
24566   unsigned CC = CCVal->getAsZExtVal();
24576   if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
24611   unsigned CC = N->getConstantOperandVal(2);
24612   SDValue SUBS = N->getOperand(3);
24616     Zero = N->getOperand(0);
24617     CTTZ = N->getOperand(1);
24619     Zero = N->getOperand(1);
24620     CTTZ = N->getOperand(0);
24646       DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType());
24659   SDValue L = Op->getOperand(0);
24660   SDValue R = Op->getOperand(1);
24662       static_cast<AArch64CC::CondCode>(Op->getConstantOperandVal(2));
24664   SDValue OpCmp = Op->getOperand(3);
24676   SDValue X = CmpLHS->getOperand(0);
24677   SDValue Y = CmpLHS->getOperand(1);
24687   if (CX->getAPIntValue() == CY->getAPIntValue())
24691       static_cast<AArch64CC::CondCode>(CmpLHS->getConstantOperandVal(2));
24692   SDValue Cond = CmpLHS->getOperand(3);
24705   EVT VT = Op->getValueType(0);
24713 // (CSEL (ADD (ADD x y) -c) f LO (SUBS x c)) to
24717   SDValue SubsNode = N->getOperand(3);
24723   EVT VT = N->getValueType(0);
24732         DAG.getConstant(-CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst),
24733                         CmpOpConst->getValueType(0));
24734     SubsOp = DAG.getConstant(CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst),
24735                              CmpOpConst->getValueType(0));
24763     SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedOp);
24764     SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedOp);
24773         return N->getOperand(OpNum);
24774       SDValue Res = DAG.getNode(ISD::ADD, SDLoc(N->getOperand(OpNum)), VT,
24776       DAG.ReplaceAllUsesWith(N->getOperand(OpNum), Res);
24783                        DAG.getConstant(NewCC, SDLoc(N->getOperand(2)), MVT_CC),
24787   auto CC = static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
24796     // swapped. Due to canonicalization, this only helps for non-constant
24804   if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero())
24815     auto ExpectedOp = DAG.getConstant(-NewCmpConst, SDLoc(CmpOpConst),
24816                                       CmpOpConst->getValueType(0));
24818                                   CmpOpConst->getValueType(0));
24824     return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
24825                        CmpOpConst->getAPIntValue() + 1, AArch64CC::LO);
24828     return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
24829                        CmpOpConst->getAPIntValue() + 1, AArch64CC::HS);
24831     return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
24832                        CmpOpConst->getAPIntValue() - 1, AArch64CC::LS);
24834     return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
24835                        CmpOpConst->getAPIntValue() - 1, AArch64CC::HI);
24837     return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
24838                        CmpOpConst->getAPIntValue() - 1, AArch64CC::LE);
24840     return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
24841                        CmpOpConst->getAPIntValue() + 1, AArch64CC::LT);
24843     return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
24844                        CmpOpConst->getAPIntValue() + 1, AArch64CC::GE);
24846     return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
24847                        CmpOpConst->getAPIntValue() - 1, AArch64CC::GT);
24857   // CSEL x, x, cc -> x
24858   if (N->getOperand(0) == N->getOperand(1))
24859     return N->getOperand(0);
24869   // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
24870   // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
24874   // CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x)
24876   SDValue Cond = N->getOperand(3);
24878       Cond.hasOneUse() && Cond->hasNUsesOfValue(0, 0) &&
24879       DAG.doesNodeExist(ISD::SUB, N->getVTList(),
24881       !DAG.doesNodeExist(ISD::SUB, N->getVTList(),
24885         static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
24889       SDValue Sub = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
24891       return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0),
24892                          N->getOperand(1),
24901 // Try to re-use an already extended operand of a vector SetCC feeding a
24905   EVT Op0MVT = Op->getOperand(0).getValueType();
24906   if (!Op0MVT.isVector() || Op->use_empty())
24911   SDNode *FirstUse = *Op->user_begin();
24912   if (FirstUse->getOpcode() != ISD::VSELECT)
24914   EVT UseMVT = FirstUse->getValueType(0);
24917   if (any_of(Op->users(), [&UseMVT](const SDNode *N) {
24918         return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
24923   if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V))
24929   ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get();
24931   // split the SET_CC and re-use the extended version of the operand.
24933                                         Op->getOperand(0));
24935                                         Op->getOperand(0));
24938     Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1));
24941     Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
24946                      Op0ExtV, Op1ExtV, Op->getOperand(2));
24952   SDValue Vec = N->getOperand(0);
24958     return getVectorBitwiseReduce(N->getOpcode(), Vec, N->getValueType(0), DL,
24968   assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
24969   SDValue LHS = N->getOperand(0);
24970   SDValue RHS = N->getOperand(1);
24971   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
24973   EVT VT = N->getValueType(0);
24980       LHS->getOpcode() == AArch64ISD::CSEL &&
24981       isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
24982       LHS->hasOneUse()) {
24996   // setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
24998       LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
24999       LHS->getConstantOperandVal(1) < VT.getScalarSizeInBits() &&
25000       LHS->hasOneUse()) {
25001     EVT TstVT = LHS->getValueType(0);
25004       uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1);
25005       SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0),
25007       return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2));
25013   // setcc (iN (bitcast (vNi1 X))), -1, (eq|ne)
25014   //   ==> setcc (iN (sext (i1 (vecreduce_and (vNi1 X))))), -1, (eq|ne)
25018       LHS->getOpcode() == ISD::BITCAST) {
25019     EVT ToVT = LHS->getValueType(0);
25020     EVT FromVT = LHS->getOperand(0).getValueType();
25025                         DL, MVT::i1, LHS->getOperand(0));
25039 // Replace a flag-setting operator (eg ANDS) with the generic version
25045   SDValue LHS = N->getOperand(0);
25046   SDValue RHS = N->getOperand(1);
25047   EVT VT = N->getValueType(0);
25050   if (!N->hasAnyUseOfValue(1)) {
25051     SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
25056   // Combine identical generic nodes into this node, re-using the result.
25068   SDValue Pred = N->getOperand(0);
25069   SDValue LHS = N->getOperand(1);
25070   SDValue RHS = N->getOperand(2);
25071   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
25074       LHS->getOpcode() != ISD::SIGN_EXTEND)
25077   SDValue Extract = LHS->getOperand(0);
25078   if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
25079       Extract->getValueType(0) != N->getValueType(0) ||
25080       Extract->getConstantOperandVal(1) != 0)
25083   SDValue InnerSetCC = Extract->getOperand(0);
25084   if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
25095       Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
25096       Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
25104   assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
25108   SDValue Pred = N->getOperand(0);
25109   SDValue LHS = N->getOperand(1);
25110   SDValue RHS = N->getOperand(2);
25111   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
25117       LHS->getOpcode() == ISD::SIGN_EXTEND &&
25118       LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) {
25122     if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
25123         LHS->getOperand(0)->getOperand(0) == Pred)
25124       return LHS->getOperand(0);
25128     // -> nxvNi1 ...
25130       return LHS->getOperand(0);
25134     // -> nxvNi1 and(pred, ...)
25138       return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0),
25139                          LHS->getOperand(0), Pred);
25152   if (!Op->hasOneUse())
25155   // We don't handle undef/constant-fold cases below, as they should have
25159   // (tbz (trunc x), b) -> (tbz x, b)
25161   if (Op->getOpcode() == ISD::TRUNCATE &&
25162       Bit < Op->getValueType(0).getSizeInBits()) {
25163     return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25166   // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
25167   if (Op->getOpcode() == ISD::ANY_EXTEND &&
25168       Bit < Op->getOperand(0).getValueSizeInBits()) {
25169     return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25172   if (Op->getNumOperands() != 2)
25175   auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
25179   switch (Op->getOpcode()) {
25183   // (tbz (and x, m), b) -> (tbz x, b)
25185     if ((C->getZExtValue() >> Bit) & 1)
25186       return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25189   // (tbz (shl x, c), b) -> (tbz x, b-c)
25191     if (C->getZExtValue() <= Bit &&
25192         (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
25193       Bit = Bit - C->getZExtValue();
25194       return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25198   // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
25200     Bit = Bit + C->getZExtValue();
25201     if (Bit >= Op->getValueType(0).getSizeInBits())
25202       Bit = Op->getValueType(0).getSizeInBits() - 1;
25203     return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25205   // (tbz (srl x, c), b) -> (tbz x, b+c)
25207     if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
25208       Bit = Bit + C->getZExtValue();
25209       return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25213   // (tbz (xor x, -1), b) -> (tbnz x, b)
25215     if ((C->getZExtValue() >> Bit) & 1)
25217     return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25221 // Optimize test single bit zero/non-zero and branch.
25225   unsigned Bit = N->getConstantOperandVal(2);
25227   SDValue TestSrc = N->getOperand(1);
25233   unsigned NewOpc = N->getOpcode();
25244   return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
25245                      DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
25254   auto SelectA = N->getOperand(1);
25255   auto SelectB = N->getOperand(2);
25256   auto NTy = N->getValueType(0);
25260   SDValue SetCC = N->getOperand(0);
25275   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
25286 // vselect (v1i1 setcc) ->
25295   SDValue N0 = N->getOperand(0);
25299     return N->getOperand(1);
25302     return N->getOperand(2);
25304   // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
25305   // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
25307   SDValue SetCC = N->getOperand(0);
25313     SDNode *SplatLHS = N->getOperand(1).getNode();
25314     SDNode *SplatRHS = N->getOperand(2).getNode();
25316     if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
25326           NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
25331       auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
25343   EVT ResVT = N->getValueType(0);
25349   SDValue IfTrue = N->getOperand(1);
25350   SDValue IfFalse = N->getOperand(2);
25353                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
25359 /// the compare-mask instructions rather than going via NZCV, even if LHS and
25365   SDValue N0 = N->getOperand(0);
25366   EVT ResVT = N->getValueType(0);
25378          "Scalar-SETCC feeding SELECT has unexpected result type!");
25381   // largest real NEON comparison is 64-bits per lane, which means the result is
25382   // at most 32-bits and an illegal vector. Just bail out for now.
25424   return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
25429   EVT VT = N->getValueType(0);
25435     SmallVector<SDValue> Ops(N->ops());
25436     if (SDNode *LN = DCI.DAG.getNodeIfExists(N->getOpcode(),
25443   if (N->getOpcode() == AArch64ISD::DUP) {
25452       SDValue EXTRACT_VEC_ELT = N->getOperand(0);
25470   if (N->getValueType(0) == N->getOperand(0).getValueType())
25471     return N->getOperand(0);
25472   if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
25473     return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0),
25474                        N->getOperand(0).getOperand(0));
25481 // globaladdr as (globaladdr + constant) - constant.
25486   if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
25490   uint64_t MinOffset = -1ull;
25491   for (SDNode *N : GN->users()) {
25492     if (N->getOpcode() != ISD::ADD)
25494     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
25496       C = dyn_cast<ConstantSDNode>(N->getOperand(1));
25499     MinOffset = std::min(MinOffset, C->getZExtValue());
25501   uint64_t Offset = MinOffset + GN->getOffset();
25505   // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
25506   if (Offset <= uint64_t(GN->getOffset()))
25521   const GlobalValue *GV = GN->getGlobal();
25522   Type *T = GV->getValueType();
25523   if (!T->isSized() ||
25524       Offset > GV->getDataLayout().getTypeAllocSize(T))
25535   SDValue BR = N->getOperand(0);
25536   if (!Subtarget->hasCSSC() || BR.getOpcode() != ISD::BITREVERSE ||
25588                             OffsetConst->getZExtValue(), ScalarSizeInBytes);
25594   const SDValue Src = N->getOperand(2);
25595   const EVT SrcVT = Src->getValueType(0);
25617   SDValue Base = N->getOperand(4);
25620   SDValue Offset = N->getOperand(5);
25623   // applies to non-temporal scatters because there's no instruction that takes
25635   // In the case of non-temporal gather loads there's only one SVE instruction
25636   // per data-size: "scalar + vector", i.e.
25648   // immediates outside that range and non-immediate scalar offsets use SST1 or
25679   // Keep the original type of the input data to store - this is needed to be
25694   SDValue Ops[] = {N->getOperand(0), // Chain
25696                    N->getOperand(3), // Pg
25707   const EVT RetVT = N->getValueType(0);
25719   SDValue Base = N->getOperand(3);
25722   SDValue Offset = N->getOperand(4);
25725   // offsets. This applies to non-temporal and quadword gathers, which do not
25737   // In the case of non-temporal gather loads and quadword gather loads there's
25751   // immediates outside that range and non-immediate scalar offsets use
25784   // Keep the original output value type around - this is needed to be able to
25792   SDValue Ops[] = {N->getOperand(0), // Chain
25793                    N->getOperand(2), // Pg
25814   SDValue Src = N->getOperand(0);
25815   unsigned Opc = Src->getOpcode();
25817   // Sign extend of an unsigned unpack -> signed unpack
25823     // Push the sign extend to the operand of the unpack
25827     // ->
25829     // ->
25831     SDValue ExtOp = Src->getOperand(0);
25832     auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
25844     return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
25919   EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
25920   EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
25925   EVT DstVT = N->getValueType(0);
25929   for (unsigned I = 0; I < Src->getNumOperands(); ++I)
25930     Ops.push_back(Src->getOperand(I));
25941 /// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
25945   SDValue Offset = N->getOperand(OffsetPos);
25951   // Extend the unpacked offset vector to 64-bit lanes.
25954   SmallVector<SDValue, 5> Ops(N->ops());
25955   // Replace the offset operand with the 64-bit one.
25958   return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
25970   if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
25974   SmallVector<SDValue, 5> Ops(N->ops());
25982   return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
26011   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
26012   SDValue InsertVec = N->getOperand(0);
26013   SDValue InsertElt = N->getOperand(1);
26014   SDValue InsertIdx = N->getOperand(2);
26033   // If we get here we are effectively trying to zero lanes 1-N of a vector.
26036   if (N->getValueType(0) != ExtractVec.getValueType())
26057   SDValue N0 = N->getOperand(0);
26058   EVT VT = N->getValueType(0);
26061   if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
26069   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
26073       N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
26075       VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) {
26078                                      LN0->getChain(), LN0->getBasePtr(),
26079                                      N0.getValueType(), LN0->getMemOperand());
26094   EVT VT = N->getValueType(0);
26097   if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
26102   SDValue Mask = N->getOperand(0);
26103   SDValue In1 = N->getOperand(1);
26104   SDValue In2 = N->getOperand(2);
26113   EVT VT = N->getValueType(0);
26115   SDValue Insert = N->getOperand(0);
26123   uint64_t IdxDupLane = N->getConstantOperandVal(1);
26141                   DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
26143                                       NewInsert, N->getOperand(1));
26154   SDValue LHS = N->getOperand(0);
26155   SDValue RHS = N->getOperand(1);
26183   // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
26206   if (ExtractHighSrcVec->use_size() != 2)
26210   for (SDNode *User : ExtractHighSrcVec.getNode()->users()) {
26214     if (User->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
26215         !isNullConstant(User->getOperand(1))) {
26223   if (!ExtractLow || !ExtractLow->hasOneUse())
26228     SDNode *ExtractLowUser = *ExtractLow.getNode()->user_begin();
26229     if (ExtractLowUser->getOpcode() != N->getOpcode()) {
26232       if (ExtractLowUser->getOperand(0) == ExtractLow) {
26233         if (ExtractLowUser->getOperand(1).getOpcode() == ISD::TRUNCATE)
26234           TruncLow = ExtractLowUser->getOperand(1);
26238         if (ExtractLowUser->getOperand(0).getOpcode() == ISD::TRUNCATE)
26239           TruncLow = ExtractLowUser->getOperand(0);
26248   // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
26311   EVT VT = N->getValueType(0);
26315   SDValue ZEXT = N->getOperand(0);
26347 ///   (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2))
26356   SDValue Op0 = N->getOperand(0);
26360   SDValue C1 = Op0->getOperand(1);
26361   SDValue C2 = N->getOperand(1);
26366   if (N->hasOneUse()) {
26367     unsigned UseOpc = N->user_begin()->getOpcode();
26374   EVT VT = N->getValueType(0);
26377   // DAGCombiner will simplify (and (op x...), (op y...)) -> (op (and x, y))
26383   SDValue X = Op0->getOperand(0);
26391   switch (N->getOpcode()) {
26424         APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
26426         APInt::getAllOnes(N->getValueType(0).getVectorNumElements());
26549     switch (N->getConstantOperandVal(1)) {
26700       unsigned IntrinsicID = N->getConstantOperandVal(1);
26707           N->getOperand(0), DAG.getConstant(Register, DL, MVT::i32));
26717                          DAG.getVTList(MVT::Other), N->getOperand(0),
26718                          N->getOperand(2), N->getOperand(3));
26721                          DAG.getVTList(MVT::Other), N->getOperand(0),
26722                          N->getOperand(2), N->getOperand(3));
26740 // we can't perform a tail-call. In particular, we need to check for
26745   if (N->getNumValues() != 1)
26747   if (!N->hasNUsesOfValue(1, 0))
26751   SDNode *Copy = *N->user_begin();
26752   if (Copy->getOpcode() == ISD::CopyToReg) {
26755     if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
26758     TCChain = Copy->getOperand(0);
26759   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
26763   for (SDNode *Node : Copy->users()) {
26764     if (Node->getOpcode() != AArch64ISD::RET_GLUE)
26780 bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
26781   return CI->isTailCall();
26788   if (!CstOffset || CstOffset->isZero())
26794   return isInt<9>(CstOffset->getSExtValue());
26801   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
26804   // Non-null if there is exactly one user of the loaded value (ignoring chain).
26806   for (SDUse &U : N->uses()) {
26812       ValOnlyUser = nullptr; // Multiple non-chain uses, bail out.
26823   if (ValOnlyUser && ValOnlyUser->getValueType(0).isScalableVector() &&
26824       (ValOnlyUser->getOpcode() == ISD::SPLAT_VECTOR ||
26825        (ValOnlyUser->getOpcode() == AArch64ISD::DUP_MERGE_PASSTHRU &&
26826         IsUndefOrZero(ValOnlyUser->getOperand(2)))))
26829   Base = Op->getOperand(0);
26832   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
26833     int64_t RHSC = RHS->getSExtValue();
26834     if (Op->getOpcode() == ISD::SUB)
26835       RHSC = -(uint64_t)RHSC;
26838     // Always emit pre-inc/post-inc addressing mode. Use negated constant offset
26840     Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0));
26853     VT = LD->getMemoryVT();
26854     Ptr = LD->getBasePtr();
26856     VT = ST->getMemoryVT();
26857     Ptr = ST->getBasePtr();
26873     VT = LD->getMemoryVT();
26874     Ptr = LD->getBasePtr();
26876     VT = ST->getMemoryVT();
26877     Ptr = ST->getBasePtr();
26883   // Post-indexing updates the base, so it's not a valid transform
26895   SDValue Op = N->getOperand(0);
26896   EVT VT = N->getValueType(0);
26923   SDValue Op = N->getOperand(0);
26924   EVT VT = N->getValueType(0);
26937   SDValue Op = N->getOperand(0);
26938   EVT VT = N->getValueType(0);
26958            "Expected fp->int bitcast!");
26989   EVT VT = N->getValueType(0);
26992        !N->getFlags().hasAllowReassociation()) ||
26993       (VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
26997   SDValue X = N->getOperand(0);
26998   auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(1));
27000     Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
27001     X = N->getOperand(1);
27006   if (Shuf->getOperand(0) != X || !Shuf->getOperand(1)->isUndef())
27010   ArrayRef<int> Mask = Shuf->getMask();
27012     if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1))
27041   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
27050   SDValue In = N->getOperand(0);
27058   EVT VT = N->getValueType(0);
27067   auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
27071   unsigned Index = CIndex->getZExtValue();
27078   SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
27101   assert(N->getValueType(0) == MVT::i128 &&
27104   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
27105   if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
27106     // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
27109         createGPRPairNode(DAG, N->getOperand(2)), // Compare value
27110         createGPRPairNode(DAG, N->getOperand(3)), // Store value
27111         N->getOperand(1), // Ptr
27112         N->getOperand(0), // Chain in
27116     switch (MemOp->getMergedOrdering()) {
27152   switch (MemOp->getMergedOrdering()) {
27171   auto Desired = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
27172   auto New = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
27173   SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
27174                    New.first,        New.second,    N->getOperand(0)};
27188   // LowerATOMIC_LOAD_AND). We can't take that approach with 128-bit, because
27189   // the type is not legal. Therefore we shouldn't expect to see a 128-bit
27264   // LSE128 has a 128-bit RMW ops, but i128 is not a legal type, so lower it
27272   assert(N->getValueType(0) == MVT::i128 &&
27275   if (!Subtarget->hasLSE128())
27278   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
27279   const SDValue &Chain = N->getOperand(0);
27280   const SDValue &Ptr = N->getOperand(1);
27281   const SDValue &Val128 = N->getOperand(2);
27285   const unsigned ISDOpcode = N->getOpcode();
27287       getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering());
27293                     DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.first);
27296                     DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.second);
27319   switch (N->getOpcode()) {
27378     assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
27385     assert(N->getValueType(0) != MVT::i128 &&
27386            "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
27391     assert(cast<AtomicSDNode>(N)->getVal().getValueType() == MVT::i128 &&
27392            "Expected 128-bit atomicrmw.");
27400     EVT MemVT = LoadNode->getMemoryVT();
27401     // Handle lowering 256 bit non temporal loads into LDNP for little-endian
27403     if (LoadNode->isNonTemporal() && Subtarget->isLittleEndian() &&
27415           {LoadNode->getChain(), LoadNode->getBasePtr()},
27416           LoadNode->getMemoryVT(), LoadNode->getMemOperand());
27424     if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
27425         LoadNode->getMemoryVT() != MVT::i128) {
27426       // Non-volatile or atomic loads are optimized later in AArch64's load/store
27434           AN && AN->getSuccessOrdering() == AtomicOrdering::Acquire;
27438         assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
27442           {LoadNode->getChain(), LoadNode->getBasePtr()},
27443           LoadNode->getMemoryVT(), LoadNode->getMemOperand());
27449                       Result.getValue(FirstRes), Result.getValue(1 - FirstRes));
27460     // CONCAT_VECTORS -- but delegate to common code for result type
27464     EVT VT = N->getValueType(0);
27467         static_cast<Intrinsic::ID>(N->getConstantOperandVal(0));
27475       auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
27477                            N->getOperand(1), Op2, N->getOperand(3));
27485       auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
27487                            N->getOperand(1), Op2, N->getOperand(3));
27496                            N->getOperand(1), N->getOperand(2));
27505                            N->getOperand(1), N->getOperand(2));
27513           getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
27529       auto V = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, NewVT, N->ops());
27537     assert(N->getValueType(0) == MVT::i128 &&
27538            "READ_REGISTER custom lowering is only for 128-bit sysregs");
27539     SDValue Chain = N->getOperand(0);
27540     SDValue SysRegName = N->getOperand(1);
27547     // of the 128-bit System Register value.
27558   if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
27580 // In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
27581 // provided the address is 16-byte aligned.
27583   if (!Subtarget->hasLSE2())
27587     return LI->getType()->getPrimitiveSizeInBits() == 128 &&
27588            LI->getAlign() >= Align(16);
27591     return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
27592            SI->getAlign() >= Align(16);
27598   if (!Subtarget->hasLSE128())
27604     return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
27605            SI->getAlign() >= Align(16) &&
27606            (SI->getOrdering() == AtomicOrdering::Release ||
27607             SI->getOrdering() == AtomicOrdering::SequentiallyConsistent);
27610     return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
27611            RMW->getAlign() >= Align(16) &&
27612            (RMW->getOperation() == AtomicRMWInst::Xchg ||
27613             RMW->getOperation() == AtomicRMWInst::And ||
27614             RMW->getOperation() == AtomicRMWInst::Or);
27620   if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
27624     return LI->getType()->getPrimitiveSizeInBits() == 128 &&
27625            LI->getAlign() >= Align(16) &&
27626            LI->getOrdering() == AtomicOrdering::Acquire;
27629     return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
27630            SI->getAlign() >= Align(16) &&
27631            SI->getOrdering() == AtomicOrdering::Release;
27649   // Store-Release instructions only provide seq_cst guarantees when paired with
27650   // Load-Acquire instructions. MSVC CRT does not use these instructions to
27653   if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
27656   switch (I->getOpcode()) {
27660     return cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() ==
27663     return cast<AtomicRMWInst>(I)->getOrdering() ==
27666     return cast<StoreInst>(I)->getOrdering() ==
27671 // Loads and stores less than 128-bits are already atomic; ones above that
27676   unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
27688 // Loads and stores less than 128-bits are already atomic; ones above that
27693   unsigned Size = LI->getType()->getPrimitiveSizeInBits();
27703   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
27707   // succeed. So at -O0 lower this operation to a CAS loop.
27713   return Subtarget->hasLSE() ? AtomicExpansionKind::CmpXChg
27721   if (!RMW->isFloatingPointOperation())
27723   switch (RMW->getType()->getScalarType()->getTypeID()) {
27739 // However, with the LSE instructions (or outline-atomics mode, which provides
27740 // library routines in place of the LSE-instructions), we can directly emit many
27744   Type *Ty = AI->getType();
27745   unsigned Size = Ty->getPrimitiveSizeInBits();
27748   bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
27749                       (AI->getOperation() == AtomicRMWInst::Xchg ||
27750                        AI->getOperation() == AtomicRMWInst::Or ||
27751                        AI->getOperation() == AtomicRMWInst::And);
27757   if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
27758       !AI->isFloatingPointOperation()) {
27759     if (Subtarget->hasLSE())
27761     if (Subtarget->outlineAtomics()) {
27765       //   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
27766       // (2) low level libgcc and compiler-rt support implemented by:
27768       if (AI->getOperation() != AtomicRMWInst::Min &&
27769           AI->getOperation() != AtomicRMWInst::Max &&
27770           AI->getOperation() != AtomicRMWInst::UMin &&
27771           AI->getOperation() != AtomicRMWInst::UMax) {
27777   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
27781   // succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
27784       Subtarget->hasLSE() || rmwOpMayLowerToLibcall(*Subtarget, AI))
27794   if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
27796   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
27800   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
27804   // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
27806   unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
27816   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
27819   // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
27822   if (ValueTy->getPrimitiveSizeInBits() == 128) {
27841   Type *Tys[] = { Addr->getType() };
27845   const DataLayout &DL = M->getDataLayout();
27847   CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
27848   CI->addParamAttr(0, Attribute::get(Builder.getContext(),
27850   Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
27863   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
27869   if (Val->getType()->getPrimitiveSizeInBits() == 128) {
27873     Type *Int64Ty = Type::getInt64Ty(M->getContext());
27874     Type *Int128Ty = Type::getInt128Ty(M->getContext());
27886   Type *Tys[] = { Addr->getType() };
27889   const DataLayout &DL = M->getDataLayout();
27890   IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
27893   CallInst *CI = Builder.CreateCall(
27895                  Val, Stxr->getFunctionType()->getParamType(0)),
27897   CI->addParamAttr(1, Attribute::get(Builder.getContext(),
27898                                      Attribute::ElementType, Val->getType()));
27899   return CI;
27905   if (!Ty->isArrayTy()) {
27906     const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
27922   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
27935   if (Subtarget->isTargetAndroid())
27940   if (Subtarget->isTargetFuchsia())
27941     return UseTlsOffset(IRB, -0x10);
27948   if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
27955         M.getOrInsertFunction(Subtarget->getSecurityCheckCookieName(),
27959       F->setCallingConv(CallingConv::Win64);
27960       F->addParamAttr(0, Attribute::AttrKind::InReg);
27969   if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
27976   if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
27977     return M.getFunction(Subtarget->getSecurityCheckCookieName());
27986   if (Subtarget->isTargetAndroid())
27991   if (Subtarget->isTargetFuchsia())
27992     return UseTlsOffset(IRB, -0x8);
28023   return Mask->getValue().isPowerOf2();
28043       !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
28051   AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
28052   AFI->setIsSplitCSR(true);
28058   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
28059   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
28063   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
28064   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
28065   MachineBasicBlock::iterator MBBI = Entry->begin();
28075     Register NewVR = MRI->createVirtualRegister(RC);
28077     // FIXME: this currently does not emit CFI pseudo-instructions, it works
28078     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
28080     // CFI pseudo-instructions.
28081     assert(Entry->getParent()->getFunction().hasFnAttribute(
28084     Entry->addLiveIn(*I);
28085     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
28088     // Insert the copy-back instructions right before the terminator.
28090       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
28091               TII->get(TargetOpcode::COPY), *I)
28101   // integer division, leaving the division as-is is a loss even in terms of
28110   // Avoid merging stores into fixed-length vectors when Neon is unavailable.
28114   if (MemVT.isFixedLengthVector() && !Subtarget->isNeonAvailable())
28124   // We want inc-of-add for scalars and sub-of-not for vectors.
28132   if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
28149   assert(MBBI->isCall() && MBBI->getCFIType() &&
28152   switch (MBBI->getOpcode()) {
28164   MachineOperand &Target = MBBI->getOperand(0);
28168   return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(AArch64::KCFI_CHECK))
28170       .addImm(MBBI->getCFIType())
28175   return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
28180   if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
28238     if (GV.isThreadLocal() && Subtarget->isTargetMachO())
28244     const ConstantInt *CI;
28248       CI = MI.getOperand(1).getCImm();
28255       auto APF = MI.getOperand(1).getFPImm()->getValueAPF();
28261       CI =
28266     APInt Imm = CI->getValue();
28267     InstructionCost Cost = TTI->getIntImmCost(
28268         Imm, CI->getType(), TargetTransformInfo::TCK_CodeSize);
28277       --MaxUses;
28298     if (Inst.getType()->isScalableTy()) {
28303       if (Inst.getOperand(i)->getType()->isScalableTy())
28307       if (AI->getAllocatedType()->isScalableTy())
28332     llvm_unreachable("unexpected element type for SVE container");
28444   EVT MemVT = Load->getMemoryVT();
28454       LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
28455       DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
28456       Load->getAddressingMode(), Load->getExtensionType());
28459   if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
28461         Load->getMemoryVT().getVectorElementType());
28502   SDValue Mask = Load->getMask();
28506     assert(Load->getExtensionType() != ISD::NON_EXTLOAD &&
28515   if (Load->getPassThru()->isUndef()) {
28523     if (isZerosVector(Load->getPassThru().getNode()))
28528       ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
28529       Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
28530       Load->getAddressingMode(), Load->getExtensionType());
28535         convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
28550   EVT VT = Store->getValue().getValueType();
28552   EVT MemVT = Store->getMemoryVT();
28555   auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
28557   if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
28559         Store->getMemoryVT().getVectorElementType());
28572   return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
28573                             Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
28574                             Store->getMemOperand(), Store->getAddressingMode(),
28575                             Store->isTruncatingStore());
28583   EVT VT = Store->getValue().getValueType();
28586   auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
28587   SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
28590       Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
28591       Mask, Store->getMemoryVT(), Store->getMemOperand(),
28592       Store->getAddressingMode(), Store->isTruncatingStore());
28677     llvm_unreachable("unimplemented container type");
28710     llvm_unreachable("unimplemented container type");
28741   SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
28754   SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
28773     assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
28778     for (const SDValue &V : Op->op_values()) {
28785         EVT VTArg = VTNode->getVT().getVectorElementType();
28792              "Expected only legal fixed-width types");
28806   for (const SDValue &V : Op->op_values()) {
28816   return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
28831   for (const SDValue &V : Op->op_values()) {
28834     // Pass through non-vector operands.
28931           /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
28936   // Lower VECREDUCE_ADD of nxv2i1-nxv16i1 to CNTP rather than UADDV.
28977   SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
28978   SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
29036   unsigned NumOperands = Op->getNumOperands();
29051                                 Op->getOperand(I), Op->getOperand(I + 1)));
29193   SDValue Chain = HG->getChain();
29194   SDValue Inc = HG->getInc();
29195   SDValue Mask = HG->getMask();
29196   SDValue Ptr = HG->getBasePtr();
29197   SDValue Index = HG->getIndex();
29198   SDValue Scale = HG->getScale();
29199   SDValue IntID = HG->getIntID();
29204   assert(CID->getZExtValue() == Intrinsic::experimental_vector_histogram_add &&
29210   EVT MemVT = EVT::getVectorVT(Ctx, HG->getMemoryVT(), EC);
29222   MachineMemOperand *MMO = HG->getMemOperand();
29225       MMO->getPointerInfo(), MachineMemOperand::MOLoad, MMO->getSize(),
29226       MMO->getAlign(), MMO->getAAInfo());
29227   ISD::MemIndexType IndexType = HG->getIndexType();
29244       MMO->getPointerInfo(), MachineMemOperand::MOStore, MMO->getSize(),
29245       MMO->getAlign(), MMO->getAAInfo());
29330   // Bail out for 8-bits element types, because with 2048-bit SVE register
29341     // is not known at compile-time, we need to maintain a mask with 'VL' values
29345         Index += IndexLen - ElementsPerVectorReg;
29347         Index = Index - ElementsPerVectorReg;
29352     // For 8-bit elements and 1024-bit SVE registers and MaxOffset equals
29360   // Choosing an out-of-range index leads to the lane being zeroed vs zero
29362   // index elements. For i8 elements an out-of-range index could be a valid
29363   // for 2048-bit vector register size.
29364   for (unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
29414   auto ShuffleMask = SVN->getMask();
29424   auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT {
29430   if (SVN->isSplat()) {
29431     unsigned Lane = std::max(0, SVN->getSplatIndex());
29442       Imm == VT.getVectorNumElements() - 1) {
29448         DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
29473   if (Subtarget->hasSVE2p1() && EltSize == 64 &&
29515   // are actually sub-vectors of a larger SVE register. When mapping
29523   // when converting from fixed-length to scalable vector types (i.e. the start
29525   unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
29526   unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
29563   // 128-bits.
29564   if (MinSVESize || !Subtarget->isNeonAvailable())
29604   if (Subtarget->isLittleEndian() ||
29647     SDValue ShiftR = Op->getOperand(0);
29648     if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
29654     unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
29655     unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
29672     // used - simplify to just Val.
29673     return TLO.CombineTo(Op, ShiftR->getOperand(0));
29682         (Op->getConstantOperandAPInt(1) << Op->getConstantOperandAPInt(2))
29693       unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
29705         Known.Zero.setHighBits(BitWidth - RequiredBits);
29724   return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
29725          Subtarget->hasComplxNum();
29736   if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
29739   auto *ScalarTy = VTy->getScalarType();
29740   unsigned NumElements = VTy->getElementCount().getKnownMinValue();
29744   // power-of-2 size, as we later split them into the smallest supported size
29746   unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
29747   if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) ||
29751   if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
29752     unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
29763   return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
29764          ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
29771   VectorType *Ty = cast<VectorType>(InputA->getType());
29774   bool IsScalable = Ty->isScalableTy();
29775   bool IsInt = Ty->getElementType()->isIntegerTy();
29778       Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();
29784     int Stride = Ty->getElementCount().getKnownMinValue() / 2;
29785     int AccStride = cast<VectorType>(Accumulator->getType())
29786                         ->getElementCount()
29799     FullTy = Accumulator->getType();
29801         cast<VectorType>(Accumulator->getType()));
29824       auto *Mask = B.getAllOnesMask(Ty->getElementCount());
29849         auto *Mask = B.getAllOnesMask(Ty->getElementCount());
29872         Intrinsic::aarch64_sve_cdot, Accumulator->getType(),
29880   unsigned Opc = N->getOpcode();
29882     if (any_of(N->users(),
29883                [&](SDNode *Use) { return Use->getOpcode() == ISD::MUL; }))
29890   return Subtarget->getMinimumJumpTableEntries();
29898   if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
29913   if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
29932   assert(Subtarget->useSVEForFixedLengthVectors() && "Unexpected mode!");
29991   return !Subtarget->isTargetWindows() &&
29992          MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
30009   switch (N->getOpcode()) {
30016     assert(N->getNumValues() == 1 && "Expected one result!");
30017     assert(N->getNumOperands() == 2 && "Expected two operands!");
30018     EVT VT = N->getValueType(0);
30019     EVT Op0VT = N->getOperand(0).getValueType();
30020     EVT Op1VT = N->getOperand(1).getValueType();
30037     assert(N->getNumValues() == 1 && "Expected one result!");
30038     assert(N->getNumOperands() == 1 && "Expected one operand!");
30039     EVT VT = N->getValueType(0);
30040     EVT OpVT = N->getOperand(0).getValueType();
30055     assert(N->getNumValues() == 1 && "Expected one result!");
30056     assert(N->getNumOperands() == 2 && "Expected two operands!");
30057     EVT VT = N->getValueType(0);
30058     EVT Op0VT = N->getOperand(0).getValueType();
30059     EVT Op1VT = N->getOperand(1).getValueType();
30066     assert(N->getNumValues() == 1 && "Expected one result!");
30067     assert(N->getNumOperands() == 2 && "Expected two operands!");
30068     EVT VT = N->getValueType(0);
30069     EVT Op0VT = N->getOperand(0).getValueType();
30070     EVT Op1VT = N->getOperand(1).getValueType();
30079     assert(Op1VT == MVT::i32 && isa<ConstantSDNode>(N->getOperand(1)) &&