AArch64ISelLowering.cpp - OpenGrok cross reference for /freebsd-src/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines Matching +full:un +full:- +full:approved
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation  ----===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
109 #define DEBUG_TYPE "aarch64-lower"
119     "aarch64-elf-ldtls-generation", cl::Hidden,
124 EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
134 EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
139 static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
146 static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
154     "aarch64-enable-gisel-sve", cl::Hidden,
349   // Otherwise, it's either a constant discriminator, or a non-blended
351   if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
352       Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
353     AddrDisc = Disc->getOperand(1);
354     ConstDisc = Disc->getOperand(2);
360   // discriminator value) isn't a 16-bit constant, bail out, and let the
363   if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
364     return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
369     AddrDisc = DAG->getRegister(AArch64::NoRegister, MVT::i64);
372       DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
383   // vector to all-one or all-zero.
390   if (Subtarget->hasLS64()) {
396   if (Subtarget->hasFPARMv8()) {
404   if (Subtarget->hasNEON()) {
427   if (Subtarget->isSVEorStreamingSVEAvailable()) {
452     if (Subtarget->useSVEForFixedLengthVectors()) {
463   if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
473   computeRegisterProperties(Subtarget->getRegisterInfo());
562   // Lowering for many of the conversions is actually specified by the non-f128
588   if (Subtarget->hasFPARMv8()) {
594   if (Subtarget->hasFPARMv8()) {
612   // Variable-sized objects.
630   // AArch64 lacks both left-rotate and popcount instructions.
648   if (Subtarget->hasCSSC()) {
727   if (Subtarget->hasFullFP16()) {
800     // Round-to-integer need custom lowering for fp16, as Promote doesn't work
855   if (!Subtarget->hasFullFP16()) {
862   // AArch64 has implementations of a lot of rounding-like FP operations.
876     if (Subtarget->hasFullFP16())
885     if (Subtarget->hasFullFP16())
902   if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
914   if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
963   if (Subtarget->hasLSE128()) {
971   // 128-bit loads and stores can be done without expanding
975   // Aligned 128-bit loads and stores are single-copy atomic according to the
976   // v8.4a spec. LRCPC3 introduces 128-bit STILP/LDIAPP but still requires LSE2.
977   if (Subtarget->hasLSE2()) {
982   // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
983   // custom lowering, as there are no un-paired non-temporal stores and
994   // 256 bit non-temporal loads can be lowered to LDNP. This is done using
995   // custom lowering, as there are no un-paired non-temporal loads legalization
1019   // Make floating-point constants legal for the large code model, so they don't
1021   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
1026   // AArch64 does not have floating-point extending loads, i1 sign-extending
1027   // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
1046   if (Subtarget->hasFPARMv8()) {
1087   // Vector add and sub nodes may conceal a high-half opportunity.
1134       Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
1139       Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
1146       Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
1161   if (!Subtarget->isTargetWindows())
1178   if (Subtarget->isNeonAvailable()) {
1181     // clang-format off
1203     // clang-format on
1211     // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1216     // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1217     // Or, direct i32 -> f16 vector conversion.  Set it so custom, so the
1218     // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1224     if (Subtarget->hasFullFP16()) {
1264     // Custom handling for some quad-vector types to detect MULL.
1294       if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1347     // AArch64 has implementations of a lot of rounding-like FP operations.
1355       if (Subtarget->hasFullFP16())
1364       if (Subtarget->hasFullFP16())
1402                            Subtarget->isLittleEndian() ? Legal : Expand);
1413   if (Subtarget->hasSME()) {
1419   if (Subtarget->isSVEorStreamingSVEAvailable()) {
1429   if (Subtarget->isSVEorStreamingSVEAvailable()) {
1492       if (!Subtarget->isLittleEndian())
1495       if (Subtarget->hasSVE2() ||
1496           (Subtarget->hasSME() && Subtarget->isStreaming()))
1574     // SVE supports truncating stores of 64 and 128-bit vectors
1655       if (!Subtarget->isLittleEndian())
1666       if (!Subtarget->isLittleEndian())
1680     // NEON doesn't support 64-bit vector integer muls, but SVE does.
1686     if (Subtarget->useSVEForFixedLengthVectors()) {
1689                 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
1694                 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
1759   // Handle operations that are only available in non-streaming SVE mode.
1760   if (Subtarget->isSVEAvailable()) {
1778     if (Subtarget->hasSVE2())
1784   if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1791   if (Subtarget->hasSVE()) {
1798   PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1803   // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined.  MinGW has
1805   if (Subtarget->isTargetWindows()) {
1817   if (Subtarget->isWindowsArm64EC()) {
1858   // But we do support custom-lowering for FCOPYSIGN.
1862        Subtarget->hasFullFP16()))
1911       (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1934   //  * The lowering of the non-strict versions involves target-specific ISD
1940   if (Subtarget->isLittleEndian()) {
1948   if (Subtarget->hasD128()) {
1956   // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1957   if (!Subtarget->hasSVE())
1961   // whilelo instruction for generating fixed-width predicates too.
1975   if (!Subtarget->isSVEorStreamingSVEAvailable())
1979   // also support fixed-width predicates.
2020   // Mark floating-point truncating stores/extending loads as having custom
2033   bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2129   if (Subtarget->isNeonAvailable())
2135   if (Subtarget->isNeonAvailable())
2148 // isIntImmediate - This method tests to see if the node is a constant
2152     Imm = C->getZExtValue();
2158 // isOpcWithIntImmediate - This method tests to see if the node is a specific
2163   return N->getOpcode() == Opc &&
2164          isIntImmediate(N->getOperand(1).getNode(), Imm);
2172   uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
2187     // The goal here is to set the non-demanded bits in a way that minimizes
2189     // we set the non-demanded bits to the value of the preceding demanded bits.
2191     // non-demanded bit), we copy bit0 (1) to the least significant 'x',
2197         ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2200     bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2205     // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
2210     // We cannot shrink the element size any further if it is 2-bits.
2245   // If the new constant immediate is all-zeros or all-ones, let the target
2301   uint64_t Imm = C->getZExtValue();
2305 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
2325     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2326     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2333         ~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
2334     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2340     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2341     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2347     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2348     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2354     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2355     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2361         APInt(Known.getBitWidth(), Op->getConstantOperandVal(0)));
2366     if (!Subtarget->isTargetILP32())
2368     // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
2373     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2379         static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
2385       EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
2387       Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
2405         APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - Bound);
2414       // bits larger than the element datatype. 32-bit or larget doesn't need
2420         APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
2424         APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
2459       // Compares return either 0 or all-ones
2474   if (Subtarget->requiresStrictAlign())
2478     // Some CPUs are fine with unaligned stores except for 128-bit ones.
2479     *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
2489             // them regresses performance on micro-benchmarks and olden/bh.
2499   if (Subtarget->requiresStrictAlign())
2503     // Some CPUs are fine with unaligned stores except for 128-bit ones.
2504     *Fast = !Subtarget->isMisaligned128StoreSlow() ||
2515             // them regresses performance on micro-benchmarks and olden/bh.
2865   // We materialise the F128CSEL pseudo-instruction as some control flow and a
2877   MachineFunction *MF = MBB->getParent();
2878   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2879   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2881   MachineFunction::iterator It = ++MBB->getIterator();
2889   MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2890   MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2891   MF->insert(It, TrueBB);
2892   MF->insert(It, EndBB);
2894   // Transfer rest of current basic-block to EndBB
2895   EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2896                 MBB->end());
2897   EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2899   BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2900   BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2901   MBB->addSuccessor(TrueBB);
2902   MBB->addSuccessor(EndBB);
2905   TrueBB->addSuccessor(EndBB);
2908     TrueBB->addLiveIn(AArch64::NZCV);
2909     EndBB->addLiveIn(AArch64::NZCV);
2912   BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2925              BB->getParent()->getFunction().getPersonalityFn())) &&
2933   MachineFunction &MF = *MBB->getParent();
2935   DebugLoc DL = MBB->findDebugLoc(MBBI);
2943   return NextInst->getParent();
2950   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2951   MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2966   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2968       BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));
2984   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2987   MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode))
3000   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3001   MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
3032   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3034       BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
3050   MachineFunction *MF = BB->getParent();
3051   MachineFrameInfo &MFI = MF->getFrameInfo();
3052   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3053   TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3055     const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3057     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui))
3061     // Set the reserved bytes (10-15) to zero
3062     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui))
3066     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui))
3073   BB->remove_instr(&MI);
3080   MachineFunction *MF = BB->getParent();
3081   MachineFrameInfo &MFI = MF->getFrameInfo();
3082   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3087   assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() &&
3090   TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3093     const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3094     MachineRegisterInfo &MRI = MF->getRegInfo();
3099     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), SP)
3102     // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
3105     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MSUBXrrr), Dest)
3109     BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
3117   BB->remove_instr(&MI);
3125   if (SMEOrigInstr != -1) {
3126     const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3128         TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
3160     // has implicit def. This def is early-clobber as it will be set at
3216 //===----------------------------------------------------------------------===//
3218 //===----------------------------------------------------------------------===//
3220 //===----------------------------------------------------------------------===//
3222 //===----------------------------------------------------------------------===//
3234 /// isZerosVector - Check whether SDNode N is a zero-filled vector.
3237   while (N->getOpcode() == ISD::BITCAST)
3238     N = N->getOperand(0).getNode();
3243   if (N->getOpcode() != AArch64ISD::DUP)
3246   auto Opnd0 = N->getOperand(0);
3250 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
3279 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
3369 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
3395     // All of the compare-mask comparisons are ordered, but we can switch
3418 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
3424 // So, finally, the only LLVM-native comparisons that don't mention C or V
3523 ///   - We can implement (NEG SETCC) i.e. negating a single comparison by
3525 ///   - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
3528 ///   - Note that we can only ever negate all previously processed results.
3530 ///     of two sub-trees (because the negation affects all sub-trees emitted so
3531 ///     far, so the 2nd sub-tree we emit would also affect the first).
3533 ///   - (OR (SETCC A) (SETCC B)) can be implemented via:
3535 ///   - After transforming OR to NEG/AND combinations we may be able to use NEG
3578     APInt Imm = Const->getAPIntValue();
3579     if (Imm.isNegative() && Imm.sgt(-32)) {
3581       RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
3605 /// \param CanNegate    Set to true if we can negate the whole sub-tree just by
3608 ///                      Negate==true on this sub-tree)
3622   unsigned Opcode = Val->getOpcode();
3624     if (Val->getOperand(0).getValueType() == MVT::f128)
3635     SDValue O0 = Val->getOperand(0);
3636     SDValue O1 = Val->getOperand(1);
3655       // the leafs, then this sub-tree as a whole negates naturally.
3657       // If we cannot naturally negate the whole sub-tree, then this must be
3677 /// \p Negate is true if we want this sub-tree being negated just by changing
3683   unsigned Opcode = Val->getOpcode();
3685     SDValue LHS = Val->getOperand(0);
3686     SDValue RHS = Val->getOperand(1);
3687     ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
3720   assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
3724   SDValue LHS = Val->getOperand(0);
3731   SDValue RHS = Val->getOperand(1);
3738   // Swap sub-tree that must come first to the right side.
3751     // Swap the sub-tree that we can negate naturally to the left.
3760       // Negate the left sub-tree if possible, otherwise negate the result.
3776   // Emit sub-trees.
3812         uint64_t Mask = MaskCst->getZExtValue();
3828       uint64_t Shift = ShiftCst->getZExtValue();
3844     uint64_t C = RHSC->getZExtValue();
3853              isLegalArithImmed((uint32_t)(C - 1))) ||
3855              isLegalArithImmed(C - 1ULL))) {
3857           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3864              isLegalArithImmed((uint32_t)(C - 1))) ||
3865             (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
3867           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3908       !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
3931     // -1 constant. For example,
3942     if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
3943         cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
3944         cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
3945         LHS.getNode()->hasNUsesOfValue(1, 0)) {
3946       int16_t ValueofRHS = RHS->getAsZExtVal();
3947       if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
3958     if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
3960         if ((CC == ISD::SETNE) ^ RHSC->isZero())
4008       // Extend to 64-bits, then perform a 64-bit multiply.
4015       // Check that the result fits into a 32-bit integer.
4055     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
4066                                    !Subtarget->isNeonAvailable()))
4076   //   -->
4082     if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
4101   // (xor x, (select_cc a, b, cc, 0, -1) )
4102   //   -->
4103   // (csel x, (xor x, -1), cc ...)
4107   ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
4113   // FIXME: This could be generalized to non-integer comparisons.
4126   if (CTVal->isAllOnes() && CFVal->isZero()) {
4133   if (CTVal->isZero() && CFVal->isAllOnes()) {
4139                        DAG.getConstant(-1ULL, dl, Other.getValueType()));
4252     // The front-end should have filtered out the out-of-range values
4253     assert(Locality <= 3 && "Prefetch locality out-of-range");
4257     Locality = 3 - Locality;
4276   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
4289   bool IsStrict = Op->isStrictFPOpcode();
4294   if (useSVEForFixedLengthVectorVT(SrcVT, !Subtarget->isNeonAvailable()))
4300       !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4301         Subtarget->hasBF16())) {
4371   bool IsStrict = Op->isStrictFPOpcode();
4382   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
4383       useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
4389   if ((InVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4436   // Use a scalar operation for conversions between single-element vectors of
4456   bool IsStrict = Op->isStrictFPOpcode();
4463   if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4489   // AArch64 FP-to-int conversions saturate to the destination element size, so
4494   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4512        (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
4567   // AArch64 FP-to-int conversions saturate to the destination register size, so
4576   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4582   if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
4592        (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
4633   // Round the floating-point value into a floating-point register with the
4647   bool IsStrict = Op->isStrictFPOpcode();
4669   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
4670       useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
4715   // Use a scalar operation for conversions between single-element vectors of
4736   bool IsStrict = Op->isStrictFPOpcode();
4739   bool IsSigned = Op->getOpcode() == ISD::STRICT_SINT_TO_FP ||
4740                   Op->getOpcode() == ISD::SINT_TO_FP;
4770     // We need to be careful about i64 -> bf16.
4793       // double-precision value or it is too big. If it is sufficiently small,
4794       // we should just go u64 -> double -> bf16 in a naive way. Otherwise, we
4795       // ensure that u64 -> double has no rounding error by only using the 52
4855   if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4863   // Other conversions are legal, unless it's to the completely software-based
4925              "Expected int->fp bitcast!");
4971   // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
4972   // 64-bits we need to insert a new extension so that it will be 64-bits.
4983 // Returns lane if Op extracts from a two-element vector and lane is constant
4988   if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
4991   EVT VT = OpNode->getOperand(0).getValueType();
4992   ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));
4996   return C->getZExtValue();
5006   for (const SDValue &Elt : N->op_values()) {
5011         if (!isIntN(HalfSize, C->getSExtValue()))
5014         if (!isUIntN(HalfSize, C->getZExtValue()))
5072     return N0->hasOneUse() && N1->hasOneUse() &&
5083     return N0->hasOneUse() && N1->hasOneUse() &&
5092   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
5115   SDValue Chain = Op->getOperand(0);
5116   SDValue RMValue = Op->getOperand(1);
5120   // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
5121   // ((arg - 1) & 3) << 22).
5159   SDValue Chain = Op->getOperand(0);
5178   SDValue Chain = Op->getOperand(0);
5179   SDValue Mode = Op->getOperand(1);
5193   SDValue Chain = Op->getOperand(0);
5281   bool OverrideNEON = !Subtarget->isNeonAvailable();
5285   // Multiplications are only custom-lowered for 128-bit and 64-bit vectors so
5288          "unexpected type for custom-lowering ISD::MUL");
5303         if (Subtarget->hasSVE())
5320       if (Subtarget->hasSVE())
5342   // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
5343   // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
5409          "Expected a predicate-to-predicate bitcast");
5416   // e.g. <n x 16 x i1> -> <n x 16 x i1>
5424   // case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
5461 //    ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11]
5465 //    ->
5477 //    ->
5483 // Case 4: If the vecnum is an add of an immediate, then the non-immediate
5487 //    ->
5498   SDValue TileSlice = N->getOperand(2);
5499   SDValue Base = N->getOperand(3);
5500   SDValue VecNum = N->getOperand(4);
5507     ConstAddend = cast<ConstantSDNode>(VecNum.getOperand(1))->getSExtValue();
5510     ConstAddend = ImmNode->getSExtValue();
5515   if (int32_t C = (ConstAddend - ImmAddend)) {
5572         Op->getOperand(0), // Chain
5578         Op->getOperand(0), // Chain
5593     SDValue Chain = Node->getChain();
5598     auto Alignment = Node->getMemOperand()->getAlign();
5599     bool IsVol = Node->isVolatile();
5600     auto DstPtrInfo = Node->getPointerInfo();
5659                                   SelectionDAG &DAG) -> SDValue {
5662       // re-use the dag-combiner function with aarch64_neon_{pmull,smull,umull}.
5687       // the non-high version of PMULL instruction. Use v1i64 to represent i64.
5958     const auto *RegInfo = Subtarget->getRegisterInfo();
5959     unsigned Reg = RegInfo->getLocalAddressRegister(MF);
5971     auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
6039     // then extracting a fixed-width subvector from the scalable vector.
6072       // an SVE predicate register mask from the fixed-width vector.
6097   // SVE only supports implicit extension of 32-bit indices.
6098   if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
6105   // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit
6112   if (!ExtVT.isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
6120   if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal->getOperand(0))) {
6121     if (!isLoadExtLegalOrCustom(ISD::ZEXTLOAD, ExtVT, Ld->getValueType(0))) {
6122       // Disable extending masked loads for fixed-width for now, since the code
6128       for (auto *U : Ld->getMask()->uses())
6160   return AddrModes.find(Key)->second;
6190   SDValue Chain = MGT->getChain();
6191   SDValue PassThru = MGT->getPassThru();
6192   SDValue Mask = MGT->getMask();
6193   SDValue BasePtr = MGT->getBasePtr();
6194   SDValue Index = MGT->getIndex();
6195   SDValue Scale = MGT->getScale();
6197   EVT MemVT = MGT->getMemoryVT();
6198   ISD::LoadExtType ExtType = MGT->getExtensionType();
6199   ISD::MemIndexType IndexType = MGT->getIndexType();
6203   if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) {
6206         DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
6207                             MGT->getMemOperand(), IndexType, ExtType);
6212   bool IsScaled = MGT->isIndexScaled();
6213   bool IsSigned = MGT->isIndexSigned();
6217   uint64_t ScaleVal = Scale->getAsZExtVal();
6219     assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
6226     return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
6227                                MGT->getMemOperand(), IndexType, ExtType);
6232     assert(Subtarget->useSVEForFixedLengthVectors() &&
6235     // NOTE: Handle floating-point as if integer then bitcast the result.
6262     PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT)
6269                             Ops, MGT->getMemOperand(), IndexType, ExtType);
6289   SDValue Chain = MSC->getChain();
6290   SDValue StoreVal = MSC->getValue();
6291   SDValue Mask = MSC->getMask();
6292   SDValue BasePtr = MSC->getBasePtr();
6293   SDValue Index = MSC->getIndex();
6294   SDValue Scale = MSC->getScale();
6296   EVT MemVT = MSC->getMemoryVT();
6297   ISD::MemIndexType IndexType = MSC->getIndexType();
6298   bool Truncating = MSC->isTruncatingStore();
6300   bool IsScaled = MSC->isIndexScaled();
6301   bool IsSigned = MSC->isIndexSigned();
6305   uint64_t ScaleVal = Scale->getAsZExtVal();
6307     assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
6314     return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
6315                                 MSC->getMemOperand(), IndexType, Truncating);
6320     assert(Subtarget->useSVEForFixedLengthVectors() &&
6323     // Once bitcast we treat floating-point scatters as if integer.
6357     return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
6358                                 MSC->getMemOperand(), IndexType, Truncating);
6369   EVT VT = Op->getValueType(0);
6374   SDValue PassThru = LoadNode->getPassThru();
6375   SDValue Mask = LoadNode->getMask();
6377   if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
6381       VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
6382       LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
6383       LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
6384       LoadNode->getExtensionType());
6398   SDValue Value = ST->getValue();
6419   return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
6420                       ST->getBasePtr(), ST->getMemOperand());
6432   SDValue Value = StoreNode->getValue();
6435   EVT MemVT = StoreNode->getMemoryVT();
6440             /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
6443     unsigned AS = StoreNode->getAddressSpace();
6444     Align Alignment = StoreNode->getAlign();
6447                                         StoreNode->getMemOperand()->getFlags(),
6452     if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
6456     // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
6457     // the custom lowering, as there are no un-paired non-temporal stores and
6460     if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
6469                       StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
6473                       StoreNode->getValue(),
6477           {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
6478           StoreNode->getMemoryVT(), StoreNode->getMemOperand());
6481   } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
6484     SDValue Value = StoreNode->getValue();
6485     assert(Value->getValueType(0) == MVT::i64x8);
6486     SDValue Chain = StoreNode->getChain();
6487     SDValue Base = StoreNode->getBasePtr();
6494       Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
6495                            StoreNode->getOriginalAlign());
6503 /// Lower atomic or volatile 128-bit stores to a single STP instruction.
6507   assert(StoreNode->getMemoryVT() == MVT::i128);
6508   assert(StoreNode->isVolatile() || StoreNode->isAtomic());
6511       StoreNode->getMergedOrdering() == AtomicOrdering::Release;
6512   if (StoreNode->isAtomic())
6513     assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
6514             Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
6515            StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
6516            StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
6518   SDValue Value = (StoreNode->getOpcode() == ISD::STORE ||
6519                    StoreNode->getOpcode() == ISD::ATOMIC_STORE)
6520                       ? StoreNode->getOperand(1)
6521                       : StoreNode->getOperand(2);
6529       {StoreNode->getChain(), StoreValue.first, StoreValue.second,
6530        StoreNode->getBasePtr()},
6531       StoreNode->getMemoryVT(), StoreNode->getMemOperand());
6541   if (LoadNode->getMemoryVT() == MVT::i64x8) {
6543     SDValue Base = LoadNode->getBasePtr();
6544     SDValue Chain = LoadNode->getChain();
6550                                  LoadNode->getPointerInfo(),
6551                                  LoadNode->getOriginalAlign());
6560   EVT VT = Op->getValueType(0);
6563   if (LoadNode->getMemoryVT() != MVT::v4i8)
6567   if (Subtarget->requiresStrictAlign() && LoadNode->getAlign() < Align(4))
6571   if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
6573   else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
6574            LoadNode->getExtensionType() == ISD::EXTLOAD)
6579   SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
6580                              LoadNode->getBasePtr(), MachinePointerInfo());
6639           VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
6698   if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
6709   if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
6937     if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
6938       assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
6978     EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
6993                                      !Subtarget->isNeonAvailable()))
7067            "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
7092   return !Subtarget->useSVEForFixedLengthVectors();
7117   // NEON-sized vectors can be emulated using SVE instructions.
7119     return Subtarget->isSVEorStreamingSVEAvailable();
7126   if (!Subtarget->useSVEForFixedLengthVectors())
7130   if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
7141 //===----------------------------------------------------------------------===//
7143 //===----------------------------------------------------------------------===//
7146   unsigned Opcode = N->getOpcode();
7151     unsigned IID = N->getConstantOperandVal(0);
7187     // The non-vararg case is handled in the CC function itself.
7200     if (Subtarget->isTargetWindows()) {
7202         if (Subtarget->isWindowsArm64EC())
7208     if (!Subtarget->isTargetDarwin())
7212     return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
7216       if (Subtarget->isWindowsArm64EC())
7222     if (Subtarget->isWindowsArm64EC())
7245     if (Subtarget->isWindowsArm64EC())
7264       Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
7266                     (isVarArg && Subtarget->isWindowsArm64EC());
7273     FuncInfo->setIsSVECC(true);
7292       std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
7296       EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
7324     CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
7328       // non-compliant manner for larger structs.
7333       // FIXME: This works on big-endian for composite byvals, which are the common
7344       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
7366         FuncInfo->setIsSVECC(true);
7369         FuncInfo->setIsSVECC(true);
7372         FuncInfo->setIsSVECC(true);
7387         //   tn: res,ch,glue = CopyFromReg t(n-1), ..
7403       // If this is an 8, 16 or 32-bit value, it is really passed promoted
7413             (VA.getValVT().isScalableVT() || Subtarget->isWindowsArm64EC()) &&
7437       if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
7439         BEAlign = 8 - ArgSize;
7477                 Subtarget->isWindowsArm64EC()) &&
7498               Subtarget->isWindowsArm64EC()) &&
7504         while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
7516         NumParts--;
7537       if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
7541       // i1 arguments are zero-extended to i8 by the caller. Emit a
7545         if (OrigArg->getType()->isIntegerTy(1)) {
7566       FuncInfo->setPStateSMReg(Reg);
7587     if (!Subtarget->isTargetDarwin() || IsWin64) {
7588       // The AAPCS variadic function ABI is identical to the non-variadic
7598     // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
7599     VarArgsOffset = alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
7600     FuncInfo->setVarArgsStackOffset(VarArgsOffset);
7601     FuncInfo->setVarArgsStackIndex(
7610                                        FuncInfo->getForwardedMustTailRegParms();
7630         assert(!FuncInfo->getSRetReturnReg());
7635         FuncInfo->setSRetReturnReg(Reg);
7647     // This is a non-standard ABI so by fiat I say we're allowed to make full
7654     FuncInfo->setArgumentStackToRestore(StackArgSize);
7662   FuncInfo->setBytesInStackArgArea(StackArgSize);
7664   if (Subtarget->hasCustomCallingConv())
7665     Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
7670     TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
7676     if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) {
7696         DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
7718       Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
7724   if (Subtarget->isWindowsArm64EC()) {
7725     // In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs
7731   unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
7735       GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
7738         MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
7743     if (Subtarget->isWindowsArm64EC()) {
7745       // compute its address relative to x4.  For a normal AArch64->AArch64
7762                                      MF, GPRIdx, (i - FirstVariadicGPR) * 8)
7769   FuncInfo->setVarArgsGPRIndex(GPRIdx);
7770   FuncInfo->setVarArgsGPRSize(GPRSaveSize);
7772   if (Subtarget->hasFPARMv8() && !IsWin64) {
7777     unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
7795     FuncInfo->setVarArgsFPRIndex(FPRIdx);
7796     FuncInfo->setVarArgsFPRSize(FPRSaveSize);
7804 /// LowerCallResult - Lower the result values of a call into the
7912   bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC, IsVarArg);
7982   // The check for matching callee-saved regs will determine whether it is
7985       MF.getInfo<AArch64FunctionInfo>()->isSVECC())
7990   // When using the Windows calling convention on a non-windows OS, we want
7993   if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
8003     if (i->hasByValAttr())
8006     // On Windows, "inreg" attributes signify non-aggregate indirect returns.
8012     if (i->hasInRegAttr())
8019   // Externally-defined functions with weak linkage should not be
8020   // tail-called on AArch64 when the OS does not support dynamic
8021   // pre-emption of symbols, as the AAELF spec requires normal calls
8024   // situation (as used for tail calls) is implementation-defined, so we
8027     const GlobalValue *GV = G->getGlobal();
8029     if (GV->hasExternalWeakLinkage() &&
8050   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8051   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8053     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8054     if (Subtarget->hasCustomCallingConv()) {
8055       TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
8056       TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
8058     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8071   if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
8093                 Subtarget->isWindowsArm64EC()) &&
8101   if (CCInfo.getStackSize() > FuncInfo->getBytesInStackArgArea())
8117   int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
8125   for (SDNode *U : DAG.getEntryNode().getNode()->uses())
8127       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
8128         if (FI->getIndex() < 0) {
8129           int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
8131           InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
8148 // Check if the value is zero-extended from i1 to i8
8162   // Live-in physreg copies that are glued to SMSTART are applied as
8163   // implicit-def's in the InstrEmitter. Here we remove them, allowing the
8165   // copies to avoid these fake clobbers of actually-preserved GPRs.
8168     for (unsigned I = MI.getNumOperands() - 1; I > 0; --I)
8187   // frame-address. If they contain a frame-index to a scalable vector, this
8191   if (MF.getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
8209   FuncInfo->setHasStreamingModeChanges(true);
8211   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8212   SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask());
8243 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
8265   bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
8269   if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
8270       !Subtarget->noBTIAtReturnTwice()) {
8271     GuardWithBTI = FuncInfo->branchTargetEnforcement();
8324   if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
8341   // arguments to begin at SP+0. Completely unused for non-tail calls.
8345     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
8348     // popped size 16-byte aligned.
8354     FPDiff = NumReusableBytes - NumBytes;
8358     if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
8359       FuncInfo->setTailCallReservedStack(-FPDiff);
8361     // The stack pointer must be 16-byte aligned at all times it's used for a
8364     // a 16-byte aligned SP and the delta applied for the tail call should
8374     CalleeAttrs = SMEAttrs(ES->getSymbol());
8377       [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
8380       R << ore::NV("Callee", ES->getSymbol());
8381     else if (CLI.CB && CLI.CB->getCalledFunction())
8382       R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
8391     const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
8455   // PSTATE.ZA before the call if there is no lazy-save active.
8458          "Lazy-save should have PSTATE.SM=1 on entry to the function");
8479   if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
8480     const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
8490     CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
8508         // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
8511         // already be zero-extended.
8513         // We cannot just emit a (zext i8 (trunc (assert-zext i8)))
8517         //   (ext (zext x)) -> (zext x)
8529       assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
8545       assert((isScalable || Subtarget->isWindowsArm64EC()) &&
8552         while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
8575         NumParts--;
8613         // parts of an [N x i32] into an X-register. The extension type will
8621                 ->second;
8649       // FIXME: This works on big-endian for composite byvals, which are the
8660       if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
8663           BEAlign = 8 - OpSize;
8712   if (IsVarArg && Subtarget->isWindowsArm64EC()) {
8735     if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
8748   // Build a sequence of copy-to-reg nodes chained together with token chain
8760     auto GV = G->getGlobal();
8762         Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
8767       const GlobalValue *GV = G->getGlobal();
8772                    Subtarget->isTargetMachO()) ||
8773                   MF.getFunction().getParent()->getRtLibUseGOT();
8774     const char *Sym = S->getSymbol();
8783   // We don't usually want to end the call-sequence here because we would tidy
8784   // the frame up *after* the call, however in the ABI-changing tail-call case
8825     const uint64_t Key = CLI.PAI->Key;
8832         extractPtrauthBlendDiscriminators(CLI.PAI->Discriminator, &DAG);
8849   // Add a register mask operand representing the call-preserved registers.
8851   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8853     // For 'this' returns, use the X0-preserving mask if applicable
8854     Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
8857       Mask = TRI->getCallPreservedMask(MF, CallConv);
8860     Mask = TRI->getCallPreservedMask(MF, CallConv);
8862   if (Subtarget->hasCustomCallingConv())
8863     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
8865   if (TRI->isAnyArgRegReserved(MF))
8866     TRI->emitReservedArgRegCallError(MF);
8882       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
8892     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
8911     InGlue = Result.getValue(Result->getNumValues() - 1);
8919     if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
8941     TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
8943         TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
8976       // a vreg -> vreg copy.
8990         DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
9040         // AAPCS requires i1 to be zero-extended to i8 by the producer of the
9055       assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
9066           })->second;
9074   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
9080       Register Reg = FuncInfo->getPStateSMReg();
9108   if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
9122   const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
9158 //===----------------------------------------------------------------------===//
9160 //===----------------------------------------------------------------------===//
9165   return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
9166                                     N->getOffset(), Flag);
9172   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
9178   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
9179                                    N->getOffset(), Flag);
9185   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
9191   return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
9251   const GlobalValue *GV = GN->getGlobal();
9252   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
9255     assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
9312   assert(Subtarget->isTargetDarwin() &&
9318   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
9343   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
9344   const uint32_t *Mask = TRI->getTLSCallPreservedMask();
9345   if (Subtarget->hasCustomCallingConv())
9346     TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
9358   // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
9359   if (DAG.getMachineFunction().getFunction().hasFnAttribute("ptrauth-calls")) {
9373 /// Convert a thread-local variable reference into a sequence of instructions to
9464 /// When accessing thread-local variables under either the general-dynamic or
9465 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
9466 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
9479 ///  Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
9500   assert(Subtarget->isTargetELF() && "This function expects an ELF target");
9504   TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
9525   const GlobalValue *GV = GA->getGlobal();
9535     // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
9543     MFI->incNumLocalDynamicTLSAccesses();
9552     // thread-local area.
9556     // in its thread-storage area.
9587   assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
9627   const GlobalValue *GV = GA->getGlobal();
9649   if (Subtarget->isTargetDarwin())
9651   if (Subtarget->isTargetELF())
9653   if (Subtarget->isTargetWindows())
9659 //===----------------------------------------------------------------------===//
9663 // - MOVaddrPAC: similar to MOVaddr, with added PAC.
9667 // - LOADgotPAC: similar to LOADgot, with added PAC.
9670 //   section is assumed to be read-only (for example, via relro mechanism). See
9673 // - LOADauthptrstatic: similar to LOADgot, but use a
9681 // provide integrity guarantees on the to-be-signed intermediate values.
9684 // with often similarly-signed pointers, making it a good harvesting target.
9692   assert(TGN->getGlobal()->hasExternalWeakLinkage());
9698   if (TGN->getOffset() != 0)
9700         "unsupported non-zero offset in weak ptrauth global reference");
9703     report_fatal_error("unsupported weak addr-div ptrauth global");
9725   // Blend only works if the integer discriminator is 16-bit wide.
9730   // Choosing between 3 lowering alternatives is target-specific.
9731   if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
9740   const GlobalValue *PtrGV = PtrN->getGlobal();
9744       Subtarget->ClassifyGlobalReference(PtrGV, getTargetMachine());
9747          "unsupported non-GOT op flags on ptrauth global reference");
9750   PtrOffsetC += PtrN->getOffset();
9753   assert(PtrN->getTargetFlags() == 0 &&
9762   // No GOT load needed -> MOVaddrPAC
9764     assert(!PtrGV->hasExternalWeakLinkage() && "extern_weak should use GOT");
9771   // GOT load -> LOADgotPAC
9773   if (!PtrGV->hasExternalWeakLinkage())
9779   // extern_weak ref -> LOADauthptrstatic
9791             cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
9796             Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
9798   return {Val, Val.getValueSizeInBits() - 1};
9803   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
9835     if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
9858     if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
9862         // out of bounds, a late MI-layer pass rewrites branches.
9878         // out of bounds, a late MI-layer pass rewrites branches.
9901     if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
9940   if (!Subtarget->isNeonAvailable() &&
9941       !Subtarget->useSVEForFixedLengthVectors())
9960       useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
9979   auto SetVecVal = [&](int Idx = -1) {
10011   // 64-bit elements. Instead, materialize all bits set and then negate that.
10039       useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
10042   if (!Subtarget->isNeonAvailable())
10058   //  FMOV    D0, X0        // copy 64-bit int to vector, high bits zero'd
10059   //  CNT     V0.8B, V0.8B  // 8xbyte pop-counts
10060   //  ADDV    B0, V0.8B     // sum 8xbyte pop-counts
10104   if (Subtarget->hasDotProd() && VT.getScalarSizeInBits() != 16 &&
10141              VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()));
10174           VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
10201           VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
10252   // Skip the one-use zext
10253   if (N->getOpcode() == ISD::ZERO_EXTEND && N->hasOneUse())
10254     N = N->getOperand(0);
10257   if (N->getOpcode() == ISD::XOR) {
10258     WorkList.push_back(std::make_pair(N->getOperand(0), N->getOperand(1)));
10263   // All the non-leaf nodes must be OR.
10264   if (N->getOpcode() != ISD::OR || !N->hasOneUse())
10267   if (isOrXorChain(N->getOperand(0), Num, WorkList) &&
10268       isOrXorChain(N->getOperand(1), Num, WorkList))
10275   SDValue LHS = N->getOperand(0);
10276   SDValue RHS = N->getOperand(1);
10278   EVT VT = N->getValueType(0);
10282   if (N->getOpcode() != ISD::SETCC)
10285   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10290       LHS->getOpcode() == ISD::OR && LHS->hasOneUse() &&
10314   bool IsStrict = Op->isStrictFPOpcode();
10322   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
10417   ISD::CondCode Cond = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10445   if ((LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
10459     // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
10460     // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
10462     if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
10463         CTVal->isOne() && CFVal->isAllOnes() &&
10468                       DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
10473     // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
10474     // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
10477         RHSC && RHSC->isZero() && CFVal && CFVal->isZero() &&
10482                       DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
10494     if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
10498     } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
10519       const int64_t TrueVal = CTVal->getSExtValue();
10520       const int64_t FalseVal = CFVal->getSExtValue();
10529                  TrueVal == -FalseVal) {
10532         // If our operands are only 32-bit wide, make sure we use 32-bit
10536         // 64-bit arithmetic).
10537         const uint32_t TrueVal32 = CTVal->getZExtValue();
10538         const uint32_t FalseVal32 = CFVal->getZExtValue();
10548         // 64-bit check whether we can use CSINC.
10581     if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
10582         !RHSVal->isZero() && !RHSVal->isAllOnes()) {
10590     } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
10592       // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
10624     if (RHSVal && RHSVal->isZero()) {
10629           CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
10632                CFVal && CFVal->isZero() &&
10664   // -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
10674     // Create a predicate where all but the last -IdxVal elements are false.
10694   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
10705   SDValue CCVal = Op->getOperand(0);
10706   SDValue TVal = Op->getOperand(1);
10707   SDValue FVal = Op->getOperand(2);
10725   if (useSVEForFixedLengthVectorVT(Ty, !Subtarget->isNeonAvailable())) {
10740     if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
10758     CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
10767   if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
10776   if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
10791       !Subtarget->isTargetMachO())
10805   int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
10808   AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
10810   // With aarch64-jump-table-hardening, we only expand the jump table dispatch
10813           "aarch64-jump-table-hardening")) {
10815     if (Subtarget->isTargetMachO()) {
10817         report_fatal_error("Unsupported code-model for hardened jump-table");
10820       assert(Subtarget->isTargetELF() &&
10823         report_fatal_error("Unsupported code-model for hardened jump-table");
10846   // Skip over the jump-table BRINDs, where the destination is JumpTableDest32.
10847   if (Dest->isMachineOpcode() &&
10848       Dest->getMachineOpcode() == AArch64::JumpTableDest32)
10853       Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.getFunction());
10874     if (Subtarget->isTargetMachO()) {
10888   const BlockAddress *BA = BAN->getBlockAddress();
10891           Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
10892               *BA->getFunction())) {
10896     SDValue TargetBA = DAG.getTargetBlockAddress(BA, BAN->getValueType(0));
10911   if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) {
10926   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
10929   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
10941   if (Subtarget->isWindowsArm64EC()) {
10943     // relative to x4. For a normal AArch64->AArch64 call, x4 == sp on entry,
10948     if (FuncInfo->getVarArgsGPRSize() > 0)
10949       StackOffset = -(uint64_t)FuncInfo->getVarArgsGPRSize();
10951       StackOffset = FuncInfo->getVarArgsStackOffset();
10955     FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
10956                                ? FuncInfo->getVarArgsGPRIndex()
10957                                : FuncInfo->getVarArgsStackIndex(),
10960   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
10971   unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
10978   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
10983   SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
10990   int GPRSize = FuncInfo->getVarArgsGPRSize();
10997     GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
11009   int FPRSize = FuncInfo->getVarArgsFPRSize();
11015     VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
11030       DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
11038       DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
11049   if (Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()))
11051   else if (Subtarget->isTargetDarwin())
11062   unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
11064       (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
11066           : Subtarget->isTargetILP32() ? 20 : 32;
11067   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
11068   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
11078   assert(Subtarget->isTargetDarwin() &&
11081   const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11087   unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
11101                          DAG.getConstant(Align->value() - 1, DL, PtrVT));
11103                          DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
11157   while (Depth--)
11161   if (Subtarget->isTargetILP32())
11187     const AArch64RegisterInfo *MRI = Subtarget->getRegisterInfo();
11188     unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
11189     if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
11190         !MRI->isReservedReg(MF, Reg))
11231     // live-in.
11236   // The XPACLRI instruction assembles to a hint-space instruction before
11237   // Armv8.3-A therefore this instruction can be safely used for any pre
11238   // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
11241   if (Subtarget->hasPAuth()) {
11252 /// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
11271   // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
11272   // 16-bit case when target has full fp16 support.
11281     IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
11283     IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
11286         (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
11302     unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
11311 //===----------------------------------------------------------------------===//
11313 //===----------------------------------------------------------------------===//
11319   if ((ST->hasNEON() &&
11323       (ST->hasSVE() &&
11328       // the initial estimate is 2^-8.  Thus the number of extra steps to refine
11336                        : Log2_64_Ceil(DesiredBits) - Log2_64_Ceil(AccurateBits);
11367       (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
11376       // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
11377       // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
11378       for (int i = ExtraSteps; i > 0; --i) {
11406       // Newton reciprocal iteration: E * (2 - X * E)
11407       // AArch64 reciprocal iteration instruction: (2 - M * N)
11408       for (int i = ExtraSteps; i > 0; --i) {
11421 //===----------------------------------------------------------------------===//
11423 //===----------------------------------------------------------------------===//
11429 // r - A general register
11430 // w - An FP/SIMD register of some size in the range v0-v31
11431 // x - An FP/SIMD register of some size in the range v0-v15
11432 // I - Constant that can be used with an ADD instruction
11433 // J - Constant that can be used with a SUB instruction
11434 // K - Constant that can be used with a 32-bit logical instruction
11435 // L - Constant that can be used with a 64-bit logical instruction
11436 // M - Constant that can be used as a 32-bit MOV immediate
11437 // N - Constant that can be used as a 64-bit MOV immediate
11438 // Q - A memory reference with base register and no offset
11439 // S - A symbolic address
11440 // Y - Floating point constant zero
11441 // Z - Integer constant zero
11443 //   Note that general register operands will be output using their 64-bit x
11445 // is prefixed by the %w modifier. Floating-point and SIMD register operands
11456   if (!Subtarget->hasFPARMv8())
11528 // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands
11593 /// getConstraintType - Given a constraint letter, return the type of
11643   Type *type = CallOperandVal->getType();
11652     if (type->isFloatingPointTy() || type->isVectorTy())
11675       if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
11681       if (!Subtarget->hasFPARMv8())
11702     // only take 128-bit registers so just use that regclass.
11704       if (!Subtarget->hasFPARMv8())
11712       if (!Subtarget->hasFPARMv8())
11748         tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
11750       bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
11752         // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
11753         // By default we'll emit v0-v31 for this unless there's a modifier where
11766   if (Res.second && !Subtarget->hasFPARMv8() &&
11777   if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
11783 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
11830     uint64_t CVal = C->getZExtValue();
11836     // instruction [or vice versa], in other words -1 to -4095 with optional
11843       uint64_t NVal = -C->getSExtValue();
11845         CVal = C->getSExtValue();
11853     // distinguish between bit patterns that are valid 32-bit or 64-bit
11867     // also match 32 or 64-bit immediates that can be loaded either using a
11868     // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
11869     // (M) or 64-bit 0x1234000000000000 (N) etc.
11913     // All assembler immediates are 64-bit integers.
11926 //===----------------------------------------------------------------------===//
11928 //===----------------------------------------------------------------------===//
11930 /// WidenVector - Given a value in the V64 register class, produce the
11943 /// getExtFactor - Determine the adjustment factor for the position when
11993       MaskSource = MaskSource->getOperand(0);
12009         !cast<ConstantSDNode>(MaskIdx)->getConstantIntValue()->equalsInt(i))
12015       MaskSourceVec = MaskSource->getOperand(0);
12018     } else if (MaskSourceVec != MaskSource->getOperand(0)) {
12026   // of elements in the source, or we would have an out-of-bounds access.
12089                     "various elements of other fixed-width vectors, provided "
12102     Source->MinElt = std::min(Source->MinElt, EltNo);
12103     Source->MaxElt = std::max(Source->MaxElt, EltNo);
12117           Mask.push_back(-1);
12222     if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
12233       Src.WindowBase = -NumSrcElts;
12259       Src.WindowBase = -Src.MinElt;
12288   SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
12296     int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
12310     int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
12311     ExtractBase += NumElts * (Src - Sources.begin());
12447   SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
12464   // NumEltsPerBlock with some values possibly replaced by undef-s.
12466   // Find first non-undef element
12469          "Shuffle with all-undefs must have been caught by previous cases, "
12477   size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
12482   size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
12501   // Look for the first non-undef element.
12511     return Elt != ExpectedElt++ && Elt != -1;
12519   // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
12520   // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
12526   // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
12527   // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
12533     Imm -= NumElts;
12538 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
12557 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
12576 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
12598   int LastLHSMismatch = -1, LastRHSMismatch = -1;
12601     if (M[i] == -1) {
12618   if (NumLHSMatch == NumInputElements - 1) {
12622   } else if (NumRHSMatch == NumInputElements - 1) {
12656   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
12679 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
12680 /// the specified operations to build the shuffle. ID is the perfect-shuffle
12689   unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
12690   unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
12720     auto getPFIDLane = [](unsigned ID, int Elt) -> int {
12722       Elt = 3 - Elt;
12725         Elt--;
12727       return (ID % 9 == 8) ? -1 : ID % 9;
12744       if (MaskElt == -1)
12745         MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
12747       ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
12761       ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
12793     // vrev <4 x i16> -> REV32
12798     // vrev <4 x i8> -> REV16
12820     SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
12826     unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
12862   // out of range values with 0s. We do need to make sure that any out-of-range
12863   // values are really out-of-range for a v16i8 vector.
12877         Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
12951     // Can't handle cases where vector size is not 128-bit
12959     // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
12961     // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
12962     // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
12975     // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
12980     // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
12982     Lane -= Idx * VT.getVectorNumElements() / 2;
12985     // Widen the operand to 128-bit register with undef.
13006     if (M0 == -1 && M1 == -1) {
13007       NewMask.push_back(-1);
13011     if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
13016     if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
13046   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
13048   // If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
13075   SDValue Tbl1 = Op->getOperand(0);
13076   SDValue Tbl2 = Op->getOperand(1);
13082   if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13083       Tbl1->getOperand(0) != Tbl2ID ||
13084       Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13085       Tbl2->getOperand(0) != Tbl2ID)
13088   if (Tbl1->getValueType(0) != MVT::v16i8 ||
13089       Tbl2->getValueType(0) != MVT::v16i8)
13092   SDValue Mask1 = Tbl1->getOperand(3);
13093   SDValue Mask2 = Tbl2->getOperand(3);
13097       TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
13100           dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
13103       TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);
13112                      {ID, Tbl1->getOperand(1), Tbl1->getOperand(2),
13113                       Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
13116 // Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
13118 // so custom-lower it as ZIP1-with-zeros.
13129   // FIXME: support multi-step zipping?
13144   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
13147   // Convert shuffles that are directly supported on NEON to target-specific
13151   ArrayRef<int> ShuffleMask = SVN->getMask();
13163   if (SVN->isSplat()) {
13164     int Lane = SVN->getSplatIndex();
13166     if (Lane == -1)
13172     // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
13226   } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
13273       SrcLane -= NumElts;
13292   // the PerfectShuffle-generated table to synthesize it from other shuffles.
13317   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
13352   // Current lowering only supports the SVE-ACLE types.
13361   if (CIdx && (CIdx->getZExtValue() <= 3)) {
13362     SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
13392   EVT VT = BVN->getValueType(0);
13396   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
13412 // Try 64-bit splatted SIMD immediate.
13433 // Try 32-bit splatted SIMD immediate.
13486 // Try 16-bit splatted SIMD immediate.
13531 // Try 32-bit splatted SIMD immediate with shifted ones.
13562 // Try 8-bit splatted SIMD immediate.
13622   ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
13625   EVT VT = Bvec->getValueType(0);
13628     if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
13630   ConstVal = FirstElt->getZExtValue();
13664   // If we're compiling for a specific vector-length, we can check if the
13684 //   - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
13685 //   - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
13688   EVT VT = N->getValueType(0);
13698   SDValue FirstOp = N->getOperand(0);
13700   SDValue SecondOp = N->getOperand(1);
13741     C2 = C2node->getZExtValue();
13756     C1AsAPInt = ~(C1nodeImm->getAPIntValue() << C1nodeShift->getAPIntValue());
13779   LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
13780   LLVM_DEBUG(N->dump(&DAG));
13782   LLVM_DEBUG(ResultSLI->dump(&DAG));
13791                                    !Subtarget->isNeonAvailable()))
13831   // We can always fall back to a non-immediate OR.
13848   for (SDValue Lane : Op->ops()) {
13850     // operands already. Otherwise, if Op is a floating-point splat
13855                     CstLane->getZExtValue());
13857     } else if (Lane.getNode()->isUndef()) {
13909       // FNegate each sub-element of the constant
13934         (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
13945   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
13946     if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
13949       SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
13950       SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
13961   // Thought this might return a non-BUILD_VECTOR (e.g. CONCAT_VECTORS), if so,
13971   if (BVN->isConstant()) {
13972     if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
13975                 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
13979     if (ConstantFPSDNode *Const = BVN->getConstantFPSplatNode())
13980       if (Const->isZero() && !Const->isNegative())
13991   //   3) if only one constant value is used (w/ some non-constant lanes),
13993   //      in the non-constant lanes.
13995   //             select the values we'll be overwriting for the non-constant
14053     // ------------------------------------------------------------------
14068   // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
14085       if (!isa<ConstantSDNode>(N->getOperand(1))) {
14090       SDValue N0 = N->getOperand(0);
14108       uint64_t Val = N->getConstantOperandVal(1);
14113       if (Val - 1 == 2 * i) {
14138   // Use DUP for non-constant splats. For f32 constant splats, reduce to
14145             dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
14155             dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
14168                EltTy == MVT::f64) && "Unsupported floating-point vector type");
14185   // If we need to insert a small number of different non-constant elements and
14191       NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
14195   // start by splatting that value, then replace the non-constant lanes. This
14204       ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize);
14213     // Now insert the non-constant lanes.
14340   // worse. For a vector with one or two non-undef values, that's
14360     // vector element types. After type-legalization the inserted value is
14389                                    !Subtarget->isNeonAvailable()))
14397     unsigned NumOperands = Op->getNumOperands();
14405     SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
14428                                    !Subtarget->isNeonAvailable()))
14449   // Check for non-constant or out of range lane.
14451   if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
14476   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14479   // Check for non-constant or out of range lane.
14481   if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
14529     // If this is extracting the upper 64-bits of a 128-bit vector, we match
14531     if (Idx * InVT.getScalarSizeInBits() == 64 && Subtarget->isNeonAvailable())
14536       useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) {
14595                          DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
14663       !isa<ConstantSDNode>(Op->getOperand(0)))
14666   SplatVal = Op->getConstantOperandVal(0);
14675   if (isPowerOf2_64(-SplatVal)) {
14676     SplatVal = -SplatVal;
14700         DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
14711   // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
14734   if (!Subtarget->isNeonAvailable())
14741   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14778 /// getVShiftImm - Check if this is a valid build_vector for the immediate
14789   if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
14797 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
14806   return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
14809 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
14838                                    !Subtarget->isNeonAvailable()))
14851   if (Shift->getOpcode() != ISD::SRL)
14858       dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Shift->getOperand(1)));
14862   ShiftValue = ShiftOp1->getZExtValue();
14866   SDValue Add = Shift->getOperand(0);
14867   if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse())
14873   uint64_t ExtraBits = VT.getScalarSizeInBits() - ResVT.getScalarSizeInBits();
14874   if (ShiftValue > ExtraBits && !Add->getFlags().hasNoUnsignedWrap())
14878       dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Add->getOperand(1)));
14881   uint64_t AddValue = AddOp1->getZExtValue();
14882   if (AddValue != 1ULL << (ShiftValue - 1))
14885   RShOperand = Add->getOperand(0);
14902         useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14915         (Subtarget->hasSVE2() ||
14916          (Subtarget->hasSME() && Subtarget->isStreaming()))) {
14926         useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
14937                          DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
14971   bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
15076                                    !Subtarget->isNeonAvailable()))
15079   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
15093   // Lower isnan(x) | isnan(never-nan) to x != x.
15094   // Lower !isnan(x) & !isnan(never-nan) to x == x.
15137   bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
15188          "Expected power-of-2 length vector");
15274   bool OverrideNEON = !Subtarget->isNeonAvailable() ||
15283           SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
15350   // LSE has an atomic load-clear instruction, but not a load-and.
15356   RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
15357   return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
15359                        AN->getMemOperand());
15372       cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
15373   EVT VT = Node->getValueType(0);
15376           "no-stack-arg-probe")) {
15382                        DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
15391   SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
15394   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
15395   const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
15396   if (Subtarget->hasCustomCallingConv())
15397     TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
15408   // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
15419                      DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
15437       cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
15439   EVT VT = Node->getValueType(0);
15447                      DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
15460   if (Subtarget->isTargetWindows())
15470   if (Subtarget->hasSVE2())
15495   const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
15500     assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
15504   Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
15506   Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
15513 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
15536     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
15537     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
15554     unsigned NumElts = StructTy->getNumElements();
15555     Type *VecTy = StructTy->getElementType(0);
15557     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts);
15558     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
15574       Type *ArgTy = Arg->getType();
15575       if (!ArgTy->isVectorTy())
15579     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
15580     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
15593     Type *VecTy = I.getArgOperand(0)->getType();
15597       Type *ArgTy = Arg->getType();
15598       if (!ArgTy->isVectorTy())
15603     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts);
15604     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
15652     Type *ElTy = cast<VectorType>(I.getType())->getElementType();
15663         cast<VectorType>(I.getArgOperand(0)->getType())->getElementType();
15665     Info.memVT = MVT::getVT(I.getOperand(0)->getType());
15676     Info.memVT = MVT::getVT(Val->getType());
15707   const SDValue &Base = Mem->getBasePtr();
15712     // It's unknown whether a scalable vector has a power-of-2 bitwidth.
15713     if (Mem->getMemoryVT().isScalableVector())
15718     uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
15729   if ((VT == MVT::i64 || VT == MVT::i32) && Extend->use_size()) {
15742 // Truncations from 64-bit GPR to 32-bit GPR is free.
15744   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15746   uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedValue();
15747   uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedValue();
15762   if (I->getOpcode() != Instruction::FMul)
15765   if (!I->hasOneUse())
15768   Instruction *User = I->user_back();
15770   if (!(User->getOpcode() == Instruction::FSub ||
15771         User->getOpcode() == Instruction::FAdd))
15775   const Function *F = I->getFunction();
15776   const DataLayout &DL = F->getDataLayout();
15777   Type *Ty = User->getOperand(0)->getType();
15785 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
15786 // 64-bit GPR.
15788   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15790   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15791   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15811   // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
15822   if (Ext->getType()->isVectorTy())
15825   for (const Use &U : Ext->uses()) {
15833     switch (Instr->getOpcode()) {
15835       if (!isa<ConstantInt>(Instr->getOperand(1)))
15840       auto &DL = Ext->getDataLayout();
15841       std::advance(GTI, U.getOperandNo()-1);
15844       // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
15846       // log2(sizeof(IdxTy)) - log2(8).
15847       if (IdxTy->isScalableTy())
15850           llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) -
15861       if (Instr->getType() == Ext->getOperand(0)->getType())
15876     return all_equal(Shuf->getShuffleMask());
15885     auto *FullTy = FullV->getType();
15886     auto *HalfTy = HalfV->getType();
15887     return FullTy->getPrimitiveSizeInBits().getFixedValue() ==
15888            2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
15892     auto *FullVT = cast<FixedVectorType>(FullV->getType());
15893     auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
15894     return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
15920   int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
15940     return Ext->getType()->getScalarSizeInBits() ==
15941            2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
15959          ElementIndex->getValue() == 1 &&
15960          isa<FixedVectorType>(VectorOperand->getType()) &&
15961          cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
15972   if (!GEP || GEP->getNumOperands() != 2)
15975   Value *Base = GEP->getOperand(0);
15976   Value *Offsets = GEP->getOperand(1);
15979   if (Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
15982   // Sink extends that would allow us to use 32-bit offset vectors.
15985     if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
15986         OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
15987       Ops.push_back(&GEP->getOperandUse(1));
16002     Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0));
16007     Value *ZExtOp = cast<Instruction>(Op)->getOperand(0);
16008     Ops.push_back(&cast<Instruction>(ZExtOp)->getOperandUse(0));
16009     Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0));
16021     switch (II->getIntrinsicID()) {
16024       if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1),
16026         Ops.push_back(&II->getOperandUse(0));
16027         Ops.push_back(&II->getOperandUse(1));
16033       if (isa<VectorType>(I->getType()) &&
16034           cast<VectorType>(I->getType())->getElementType()->isHalfTy() &&
16035           !Subtarget->hasFullFP16())
16042       if (isSplatShuffle(II->getOperand(0)))
16043         Ops.push_back(&II->getOperandUse(0));
16044       if (isSplatShuffle(II->getOperand(1)))
16045         Ops.push_back(&II->getOperandUse(1));
16052       if (isSplatShuffle(II->getOperand(1)))
16053         Ops.push_back(&II->getOperandUse(1));
16054       if (isSplatShuffle(II->getOperand(2)))
16055         Ops.push_back(&II->getOperandUse(2));
16059       if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
16060         if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
16061           Ops.push_back(&II->getOperandUse(0));
16067       auto *Idx = dyn_cast<Instruction>(II->getOperand(1));
16068       if (!Idx || Idx->getOpcode() != Instruction::Add)
16070       Ops.push_back(&II->getOperandUse(1));
16097       auto *Idx = dyn_cast<Instruction>(II->getOperand(3));
16098       if (!Idx || Idx->getOpcode() != Instruction::Add)
16100       Ops.push_back(&II->getOperandUse(3));
16104       if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
16106       Ops.push_back(&II->getOperandUse(0));
16107       Ops.push_back(&II->getOperandUse(1));
16110       if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
16111                                      II->getArgOperand(1)))
16113       Ops.push_back(&II->getArgOperandUse(0));
16114       Ops.push_back(&II->getArgOperandUse(1));
16117       if (!shouldSinkVectorOfPtrs(II->getArgOperand(0), Ops))
16119       Ops.push_back(&II->getArgOperandUse(0));
16122       if (!shouldSinkVectorOfPtrs(II->getArgOperand(1), Ops))
16124       Ops.push_back(&II->getArgOperandUse(1));
16132   switch (I->getOpcode()) {
16136     for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {
16137       if (shouldSinkVScale(I->getOperand(Op), Ops)) {
16138         Ops.push_back(&I->getOperandUse(Op));
16147   if (!I->getType()->isVectorTy())
16150   switch (I->getOpcode()) {
16153     if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
16158     auto Ext1 = cast<Instruction>(I->getOperand(0));
16159     auto Ext2 = cast<Instruction>(I->getOperand(1));
16160     if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
16161       Ops.push_back(&Ext1->getOperandUse(0));
16162       Ops.push_back(&Ext2->getOperandUse(0));
16165     Ops.push_back(&I->getOperandUse(0));
16166     Ops.push_back(&I->getOperandUse(1));
16171     // Pattern: Or(And(MaskValue, A), And(Not(MaskValue), B)) ->
16172     // bitselect(MaskValue, A, B) where Not(MaskValue) = Xor(MaskValue, -1)
16173     if (Subtarget->hasNEON()) {
16182           Instruction *MainAnd = I->getOperand(0) == OtherAnd
16183                                      ? cast<Instruction>(I->getOperand(1))
16184                                      : cast<Instruction>(I->getOperand(0));
16187           if (I->getParent() != MainAnd->getParent() ||
16188               I->getParent() != OtherAnd->getParent())
16191           // Non-mask operands of both Ands should also be in same basic block
16192           if (I->getParent() != IA->getParent() ||
16193               I->getParent() != IB->getParent())
16196           Ops.push_back(&MainAnd->getOperandUse(MainAnd->getOperand(0) == IA ? 1 : 0));
16197           Ops.push_back(&I->getOperandUse(0));
16198           Ops.push_back(&I->getOperandUse(1));
16209     for (auto &Op : I->operands()) {
16211       if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
16228           match(Shuffle->getOperand(0), m_ZExtOrSExt(m_Value()))) {
16229         Ops.push_back(&Shuffle->getOperandUse(0));
16231         if (match(Shuffle->getOperand(0), m_SExt(m_Value())))
16241       Value *ShuffleOperand = Shuffle->getOperand(0);
16246       Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
16251           dyn_cast<ConstantInt>(Insert->getOperand(2));
16253       if (!ElementConstant || !ElementConstant->isZero())
16256       unsigned Opcode = OperandInstr->getOpcode();
16264         unsigned Bitwidth = I->getType()->getScalarSizeInBits();
16266         const DataLayout &DL = I->getDataLayout();
16272       Ops.push_back(&Shuffle->getOperandUse(0));
16302   for (unsigned I = IsLittleEndian ? 0 : Factor - 1; I < MaskLen; I += Factor)
16312   auto *SrcTy = cast<FixedVectorType>(Op->getType());
16313   unsigned NumElts = SrcTy->getNumElements();
16314   auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16315   auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16333   auto *SrcTy = cast<FixedVectorType>(Op->getType());
16334   auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16335   auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16338   if (!createTblShuffleMask(SrcWidth, DstWidth, SrcTy->getNumElements(),
16351   int NumElements = cast<FixedVectorType>(TI->getType())->getNumElements();
16352   auto *SrcTy = cast<FixedVectorType>(TI->getOperand(0)->getType());
16353   auto *DstTy = cast<FixedVectorType>(TI->getType());
16354   assert(SrcTy->getElementType()->isIntegerTy() &&
16355          "Non-integer type source vector element is not supported");
16356   assert(DstTy->getElementType()->isIntegerTy(8) &&
16359       cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16361       cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16373   // 0,8,16,..Y*8th bytes for the little-endian format
16379                          : Itr * TruncFactor + (TruncFactor - 1)));
16402         Builder.CreateShuffleVector(TI->getOperand(0), ShuffleLanes), VecTy));
16405       auto *F = Intrinsic::getDeclaration(TI->getModule(),
16435     auto *F = Intrinsic::getDeclaration(TI->getModule(), TblID, VecTy);
16463   TI->replaceAllUsesWith(FinalResult);
16464   TI->eraseFromParent();
16471   if (!EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
16478   Function *F = I->getParent()->getParent();
16479   if (!L || L->getHeader() != I->getParent() || F->hasMinSize() ||
16480       F->hasOptSize())
16483   auto *SrcTy = dyn_cast<FixedVectorType>(I->getOperand(0)->getType());
16484   auto *DstTy = dyn_cast<FixedVectorType>(I->getType());
16492   if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
16493     auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
16499     // If the ZExt can be lowered to a single ZExt to the next power-of-2 and
16501     auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
16502     if (TTI.getCastInstrCost(I->getOpcode(), DstTy, TruncDstType,
16505       if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
16512         Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
16513         DstTy, Subtarget->isLittleEndian());
16516     ZExt->replaceAllUsesWith(Result);
16517     ZExt->eraseFromParent();
16522   if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16523       DstTy->getElementType()->isFloatTy()) {
16526         Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
16527         FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
16530     I->replaceAllUsesWith(UI);
16531     I->eraseFromParent();
16536   if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16537       DstTy->getElementType()->isFloatTy()) {
16539     auto *Shuffle = createTblShuffleForSExt(Builder, I->getOperand(0),
16541                                             Subtarget->isLittleEndian());
16546     I->replaceAllUsesWith(SI);
16547     I->eraseFromParent();
16555       (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
16556       SrcTy->getElementType()->isFloatTy() &&
16557       DstTy->getElementType()->isIntegerTy(8)) {
16559     auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
16562     I->replaceAllUsesWith(TruncI);
16563     I->eraseFromParent();
16564     createTblForTrunc(cast<TruncInst>(TruncI), Subtarget->isLittleEndian());
16570   // per lane of the input that is represented using 1,2,3 or 4 128-bit table
16573   if (TI && DstTy->getElementType()->isIntegerTy(8) &&
16574       ((SrcTy->getElementType()->isIntegerTy(32) ||
16575         SrcTy->getElementType()->isIntegerTy(64)) &&
16576        (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
16577     createTblForTrunc(TI, Subtarget->isLittleEndian());
16600   unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
16601   unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
16603     VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
16609   if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
16617   unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
16618   auto EC = VecTy->getElementCount();
16623   if (isa<FixedVectorType>(VecTy) && !Subtarget->isNeonAvailable() &&
16624       (!Subtarget->useSVEForFixedLengthVectors() ||
16629       !Subtarget->isSVEorStreamingSVEAvailable())
16646   if (Subtarget->useSVEForFixedLengthVectors()) {
16648         std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
16651          (!Subtarget->isNeonAvailable() || VecSize > 128))) {
16659   return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
16663   if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
16664     return ScalableVectorType::get(VTy->getElementType(), 2);
16666   if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
16667     return ScalableVectorType::get(VTy->getElementType(), 4);
16669   if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
16670     return ScalableVectorType::get(VTy->getElementType(), 8);
16672   if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
16673     return ScalableVectorType::get(VTy->getElementType(), 8);
16675   if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
16676     return ScalableVectorType::get(VTy->getElementType(), 2);
16678   if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
16679     return ScalableVectorType::get(VTy->getElementType(), 4);
16681   if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
16682     return ScalableVectorType::get(VTy->getElementType(), 8);
16684   if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
16685     return ScalableVectorType::get(VTy->getElementType(), 16);
16701     return Intrinsic::getDeclaration(M, SVELoads[Factor - 2], {LDVTy});
16703   return Intrinsic::getDeclaration(M, NEONLoads[Factor - 2], {LDVTy, PtrTy});
16717     return Intrinsic::getDeclaration(M, SVEStores[Factor - 2], {STVTy});
16719   return Intrinsic::getDeclaration(M, NEONStores[Factor - 2], {STVTy, PtrTy});
16742   const DataLayout &DL = LI->getDataLayout();
16744   VectorType *VTy = Shuffles[0]->getType();
16759   Type *EltTy = FVTy->getElementType();
16760   if (EltTy->isPointerTy())
16762         FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
16764   // If we're going to generate more than one load, reset the sub-vector type
16766   FVTy = FixedVectorType::get(FVTy->getElementType(),
16767                               FVTy->getNumElements() / NumLoads);
16775   Value *BaseAddr = LI->getPointerOperand();
16777   Type *PtrTy = LI->getPointerOperandType();
16778   Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
16779                                  LDVTy->getElementCount());
16781   Function *LdNFunc = getStructuredLoadFunction(LI->getModule(), Factor,
16784   // Holds sub-vectors extracted from the load intrinsic return values. The
16785   // sub-vectors are associated with the shufflevector instructions they will
16792         getSVEPredPatternFromNumElements(FVTy->getNumElements());
16793     if (Subtarget->getMinSVEVectorSizeInBits() ==
16794             Subtarget->getMaxSVEVectorSizeInBits() &&
16795         Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
16799         ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
16809       BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
16810                                             FVTy->getNumElements() * Factor);
16818     // Extract and store the sub-vectors returned by the load intrinsic.
16828             ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0));
16831       if (EltTy->isPointerTy())
16833             SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
16834                                          FVTy->getNumElements()));
16840   // Replace uses of the shufflevector instructions with the sub-vectors
16842   // associated with more than one sub-vector, those sub-vectors will be
16848     SVI->replaceAllUsesWith(WideVec);
16860       Ptr->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
16863     if (It->isDebugOrPseudoInst())
16865     if (MaxLookupDist-- == 0)
16869           SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
16872           (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.sextOrTrunc(IdxWidth))
16914   auto *VecTy = cast<FixedVectorType>(SVI->getType());
16915   assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
16917   unsigned LaneLen = VecTy->getNumElements() / Factor;
16918   Type *EltTy = VecTy->getElementType();
16921   const DataLayout &DL = SI->getDataLayout();
16932   Value *Op0 = SVI->getOperand(0);
16933   Value *Op1 = SVI->getOperand(1);
16938   if (EltTy->isPointerTy()) {
16941         cast<FixedVectorType>(Op0->getType())->getNumElements();
16952   // and sub-vector type to something legal.
16954   SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
16960   Value *BaseAddr = SI->getPointerOperand();
16962   auto Mask = SVI->getShuffleMask();
16965   // If mask is `poison`, `Mask` may be a vector of -1s.
16972   // that points to BaseAddr+16 or BaseAddr-16 then it can be better left as a
16974   if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
16976        hasNearbyPairedStore(SI->getIterator(), SI->getParent()->end(), BaseAddr,
16978        hasNearbyPairedStore(SI->getReverseIterator(), SI->getParent()->rend(),
16982   Type *PtrTy = SI->getPointerOperandType();
16983   Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
16984                                  STVTy->getElementCount());
16986   Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
16992         getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
16993     if (Subtarget->getMinSVEVectorSizeInBits() ==
16994             Subtarget->getMaxSVEVectorSizeInBits() &&
16995         Subtarget->getMinSVEVectorSizeInBits() ==
17000         ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
17021             StartMask = Mask[IdxJ] - j;
17037             ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));
17048       BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
17060   if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
17066   VectorType *VTy = cast<VectorType>(DI->getType()->getContainedType(0));
17067   const DataLayout &DL = DI->getDataLayout();
17074   if (UseScalable && !VTy->isScalableTy())
17080       VectorType::get(VTy->getElementType(),
17081                       VTy->getElementCount().divideCoefficientBy(NumLoads));
17083   Type *PtrTy = LI->getPointerOperandType();
17084   Function *LdNFunc = getStructuredLoadFunction(DI->getModule(), Factor,
17092         Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue());
17094   Value *BaseAddr = LI->getPointerOperand();
17111           Builder.getInt64(I * LdTy->getElementCount().getKnownMinValue());
17118     Result = PoisonValue::get(DI->getType());
17128   DI->replaceAllUsesWith(Result);
17135   if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
17141   VectorType *VTy = cast<VectorType>(II->getOperand(0)->getType());
17142   const DataLayout &DL = II->getDataLayout();
17149   if (UseScalable && !VTy->isScalableTy())
17155       VectorType::get(VTy->getElementType(),
17156                       VTy->getElementCount().divideCoefficientBy(NumStores));
17158   Type *PtrTy = SI->getPointerOperandType();
17159   Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
17164   Value *BaseAddr = SI->getPointerOperand();
17169         Builder.CreateVectorSplat(StTy->getElementCount(), Builder.getTrue());
17171   Value *L = II->getOperand(0);
17172   Value *R = II->getOperand(1);
17181           Builder.getInt64(I * StTy->getElementCount().getKnownMinValue());
17182       L = Builder.CreateExtractVector(StTy, II->getOperand(0), Idx);
17183       R = Builder.CreateExtractVector(StTy, II->getOperand(1), Idx);
17198   bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17199   bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17200   // Only use AdvSIMD to implement memset of 32-byte and above. It would have
17228   bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17229   bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17230   // Only use AdvSIMD to implement memset of 32-byte and above. It would have
17255 // 12-bit optionally shifted immediates are legal for adds.
17273   if (!Subtarget->hasSVE2())
17304 // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
17317   const int64_t C1 = C1Node->getSExtValue();
17318   const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
17338 /// isLegalAddressingMode - Return true if the addressing mode represented
17345   //  reg + 9-bit signed offset
17346   //  reg + SIZE_IN_BYTES * 12-bit unsigned offset
17377   if (Ty->isScalableTy()) {
17379       // See if we have a foldable vscale-based offset, for vector types which
17390           DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
17398   // No scalable offsets allowed for non-scalable types.
17405   if (Ty->isSized()) {
17412   return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.BaseOffs,
17444     return Subtarget->hasFullFP16();
17457   switch (Ty->getScalarType()->getTypeID()) {
17474   // LR is a callee-save register, but we must treat it as clobbered by any call
17476   // as implicit-defs for stackmaps and patchpoints.
17491   assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17492           N->getOpcode() == ISD::SRL) &&
17495   SDValue ShiftLHS = N->getOperand(0);
17496   EVT VT = N->getValueType(0);
17508           if (N->getOpcode() == ISD::SHL)
17509             if (auto *SHLC = dyn_cast<ConstantSDNode>(N->getOperand(1)))
17510               return SRLC->getZExtValue() == SHLC->getZExtValue();
17521   assert(N->getOpcode() == ISD::XOR &&
17522          (N->getOperand(0).getOpcode() == ISD::SHL ||
17523           N->getOperand(0).getOpcode() == ISD::SRL) &&
17527   auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
17528   auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
17531     if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
17532       unsigned ShiftAmt = ShiftC->getZExtValue();
17533       unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17534       if (N->getOperand(0).getOpcode() == ISD::SHL)
17535         return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
17536       return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
17545   assert(((N->getOpcode() == ISD::SHL &&
17546            N->getOperand(0).getOpcode() == ISD::SRL) ||
17547           (N->getOpcode() == ISD::SRL &&
17548            N->getOperand(0).getOpcode() == ISD::SHL)) &&
17549          "Expected shift-shift mask");
17551   if (!N->getOperand(0)->hasOneUse())
17555   EVT VT = N->getValueType(0);
17556   if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
17557     auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
17558     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17559     return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
17572   assert(Ty->isIntegerTy());
17574   unsigned BitSize = Ty->getPrimitiveSizeInBits();
17585     Val &= (1LL << 32) - 1;
17601 ///   xor (sra X, elt_size(X)-1), -1
17606   EVT VT = N->getValueType(0);
17607   if (!Subtarget->hasNEON() || !VT.isVector())
17612   SDValue Shift = N->getOperand(0);
17613   SDValue Ones = N->getOperand(1);
17621   if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
17645   if (N->getValueType(0) != MVT::i32)
17648   SDValue VecReduceOp0 = N->getOperand(0);
17651   if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
17656   if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
17657       ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
17660   SDValue SUB = ABS->getOperand(0);
17661   unsigned Opcode0 = SUB->getOperand(0).getOpcode();
17662   unsigned Opcode1 = SUB->getOperand(1).getOpcode();
17664   if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
17665       SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
17677   SDValue EXT0 = SUB->getOperand(0);
17678   SDValue EXT1 = SUB->getOperand(1);
17680   if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
17681       EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
17689       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
17692       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
17700       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
17703       DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
17725   if (!ST->isNeonAvailable())
17728   if (!ST->hasDotProd())
17731   SDValue Op0 = N->getOperand(0);
17732   if (N->getValueType(0) != MVT::i32 || Op0.getValueType().isScalableVT() ||
17757   // For non-mla reductions B can be set to 1. For MLA we take the operand of
17781     return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
17804       DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), ConcatSDot16);
17820       DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
17821   return DAG.getNode(ISD::ADD, DL, N->getValueType(0), VecReduceAdd16,
17876 // We can convert a UADDV(add(zext(64-bit source), zext(64-bit source))) into
17877 // UADDLV(concat), where the concat represents the 64-bit zext sources.
17879   // Look for add(zext(64-bit source), zext(64-bit source)), returning
17893   // Check zext VTs are the same and 64-bit length.
17917   SDValue A = N->getOperand(0);
17920       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
17941   if (isIntDivCheap(N->getValueType(0), Attr))
17944   EVT VT = N->getValueType(0);
17949       (VT.isFixedLengthVector() && Subtarget->useSVEForFixedLengthVectors()))
17957   // If the divisor is 2 or -2, the default expansion is better. It will add
17958   // (N->getValueType(0) >> (BitWidth - 1)) to it before shifting right.
17960       Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true))
17971   if (isIntDivCheap(N->getValueType(0), Attr))
17974   EVT VT = N->getValueType(0);
17978   if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
17991   SDValue N0 = N->getOperand(0);
17992   SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
18036 /// Calculates what the pre-extend type is, based on the extension
18040 /// pre-extend type is pulled directly from the operand, while other extend
18058     return TypeNode->getVT();
18066     uint32_t Mask = Constant->getZExtValue();
18093   SDValue Extend = BV->getOperand(0);
18107   // Restrict valid pre-extend data type
18114   for (SDValue Op : drop_begin(BV->ops())) {
18131     for (SDValue Op : BV->ops())
18142                                cast<ShuffleVectorSDNode>(BV)->getMask());
18151   EVT VT = Mul->getValueType(0);
18155   SDValue Op0 = performBuildShuffleExtendCombine(Mul->getOperand(0), DAG);
18156   SDValue Op1 = performBuildShuffleExtendCombine(Mul->getOperand(1), DAG);
18163   return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0),
18164                      Op1 ? Op1 : Mul->getOperand(1));
18167 // Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz
18170   EVT VT = N->getValueType(0);
18174   if (N->getOperand(0).getOpcode() != ISD::AND ||
18175       N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
18178   SDValue And = N->getOperand(0);
18182   if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
18189       V3 != (HalfSize - 1))
18207   EVT VT = N->getValueType(0);
18209       (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
18210        N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) ||
18211       (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
18212        N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) ||
18213       N->getOperand(0).getOperand(0).getValueType() !=
18214           N->getOperand(1).getOperand(0).getValueType())
18217   if (N->getOpcode() == ISD::MUL &&
18218       N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
18221   SDValue N0 = N->getOperand(0).getOperand(0);
18222   SDValue N1 = N->getOperand(1).getOperand(0);
18233     SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0);
18234     SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1);
18235     SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1);
18236     return DAG.getNode(N->getOpcode() == ISD::MUL ? N->getOperand(0).getOpcode()
18257   // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
18259   // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
18261   EVT VT = N->getValueType(0);
18262   SDValue N0 = N->getOperand(0);
18263   SDValue N1 = N->getOperand(1);
18267   auto IsAddSubWith1 = [&](SDValue V) -> bool {
18268     AddSubOpc = V->getOpcode();
18269     if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
18270       SDValue Opnd = V->getOperand(1);
18271       MulOper = V->getOperand(0);
18275         return C->isOne();
18295   const APInt &ConstValue = C->getAPIntValue();
18300      (N0->getOpcode() == ISD::TRUNCATE &&
18301       (IsSVECntIntrinsic(N0->getOperand(0)))))
18308   // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
18309   // 64-bit is 5 cycles, so this is always a win.
18321     if (N0->hasOneUse() && (isSignExtended(N0, DAG) ||
18326     if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
18327                            N->use_begin()->getOpcode() == ISD::SUB))
18362   // C = 45 is equal to (1+4)*(1+8), we don't decompose it into (1+2)*(16-1) as
18363   // the (2^N - 1) can't be execused via a single instruction.
18370       APInt NVMinus1 = N - 1;
18380   // C = 11 is equal to (1+4)*2+1, we don't decompose it into (1+2)*4-1 as
18381   // the (2^N - 1) can't be execused via a single instruction.
18383     APInt CVMinus1 = C - 1;
18387     APInt SCVMinus1 = CVMinus1.ashr(TrailingZeroes) - 1;
18397   // Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
18398   // C = 29 is equal to 1 - (1 - 2^3) * 2^2.
18400     APInt CVMinus1 = C - 1;
18416     // (mul x, 2^N - 1) => (sub (shl x, N), x)
18417     // (mul x, (2^(N-M) - 1) * 2^M) => (sub (shl x, N), (shl x, M))
18422     // (mul x, 1 - (1 - 2^M) * 2^N))
18423     //     =>  MV = sub (x - (shl x, M)); sub (x - (shl MV, N))
18424     APInt SCVMinus1 = ShiftedConstValue - 1;
18438     if (Subtarget->hasALULSLFast() &&
18440       APInt CVMMinus1 = CVM - 1;
18441       APInt CVNMinus1 = CVN - 1;
18450     if (Subtarget->hasALULSLFast() &&
18461     if (Subtarget->hasALULSLFast() &&
18472     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18473     // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
18474     // (mul x, -(2^(N-M) - 1) * 2^M) => (sub (shl x, M), (shl x, N))
18475     APInt SCVPlus1 = -ShiftedConstValue + 1;
18476     APInt CVNegPlus1 = -ConstValue + 1;
18477     APInt CVNegMinus1 = -ConstValue - 1;
18495   // Take advantage of vector comparisons producing 0 or -1 in each lane to
18499   //    UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
18506   EVT VT = N->getValueType(0);
18507   if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
18508       N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
18509       VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
18513   // make the transformation for non-constant splats as well, but it's unclear
18517           dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
18519     if (!BV->isConstant())
18524     EVT IntVT = BV->getValueType(0);
18527     SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
18531                                  N->getOperand(0)->getOperand(0), MaskConst);
18546   EVT VT = N->getValueType(0);
18551   if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
18554   // If the result of an integer load is only used by an integer-to-float
18556   // This eliminates an "integer-to-vector-move" UOP and improves throughput.
18557   SDValue N0 = N->getOperand(0);
18558   if (Subtarget->isNeonAvailable() && ISD::isNormalLoad(N0.getNode()) &&
18561       !cast<LoadSDNode>(N0)->isVolatile()) {
18563     SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
18564                                LN0->getPointerInfo(), LN0->getAlign(),
18565                                LN0->getMemOperand()->getFlags());
18572         (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
18579 /// Fold a floating-point multiply by power of two into floating-point to
18580 /// fixed-point conversion.
18584   if (!Subtarget->isNeonAvailable())
18587   if (!N->getValueType(0).isSimple())
18590   SDValue Op = N->getOperand(0);
18597   SDValue ConstVec = Op->getOperand(1);
18604       (FloatBits != 16 || !Subtarget->hasFullFP16()))
18607   MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
18612   // Avoid conversions where iN is larger than the float (e.g., float -> i64).
18619   int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
18620   if (C == -1 || C == 0 || C > Bits)
18627   if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
18628       N->getOpcode() == ISD::FP_TO_UINT_SAT) {
18629     EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18635   bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
18636                    N->getOpcode() == ISD::FP_TO_SINT_SAT);
18642                   Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
18645     FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
18652   EVT VT = N->getValueType(0);
18667   SDValue N0 = N->getOperand(0);
18671   SDValue N1 = N->getOperand(1);
18675   // InstCombine does (not (neg a)) => (add a -1).
18676   // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
18678   for (int i = 1; i >= 0; --i) {
18679     for (int j = 1; j >= 0; --j) {
18680       SDValue O0 = N0->getOperand(i);
18681       SDValue O1 = N1->getOperand(j);
18688         SubSibling = N0->getOperand(1 - i);
18689         AddSibling = N1->getOperand(1 - j);
18693         AddSibling = N0->getOperand(1 - i);
18694         SubSibling = N1->getOperand(1 - j);
18716   uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
18717   for (int i = 1; i >= 0; --i)
18718     for (int j = 1; j >= 0; --j) {
18721       if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) &&
18722           ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) &&
18724         return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
18725                            N0->getOperand(1 - i), N1->getOperand(1 - j));
18727       BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
18728       BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
18734         ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
18735         ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
18737             CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
18743         return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
18744                            N0->getOperand(1 - i), N1->getOperand(1 - j));
18761   EVT VT = N->getValueType(0);
18762   SDValue CSel0 = N->getOperand(0);
18763   SDValue CSel1 = N->getOperand(1);
18769   if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
18782   if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
18797   if (N->getOpcode() == ISD::AND) {
18810   if (Op1 && Op1->getAPIntValue().isNegative() &&
18811       Op1->getAPIntValue().sgt(-32)) {
18813     // if the Op1 is a constant in the range [-31, -1], we
18816         DAG.getConstant(Op1->getAPIntValue().abs(), DL, Op1->getValueType(0));
18832   EVT VT = N->getValueType(0);
18866   if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
18867     if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
18868       return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
18875   SDValue Op = N->getOperand(0);
18878     Op = Op->getOperand(0);
18887   SDValue Src = N->getOperand(0);
18888   unsigned Opc = Src->getOpcode();
18892     SDValue UnpkOp = Src->getOperand(0);
18893     SDValue Dup = N->getOperand(1);
18899     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
18903     uint64_t ExtVal = C->getZExtValue();
18905     auto MaskAndTypeMatch = [ExtVal](EVT VT) -> bool {
18913     EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
18917     // If this is 'and (uunpklo/hi (extload MemTy -> ExtTy)), mask', then check
18918     // to see if the mask is all-ones of size MemTy.
18920     if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() == ISD::ZEXTLOAD ||
18921                          MaskedLoadOp->getExtensionType() == ISD::EXTLOAD)) {
18922       EVT EltTy = MaskedLoadOp->getMemoryVT().getVectorElementType();
18928     APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
18932     Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
18936                               UnpkOp->getValueType(0), UnpkOp, Dup);
18938     return DAG.getNode(Opc, DL, N->getValueType(0), And);
18946   if (isAllActivePredicate(DAG, N->getOperand(0)))
18947     return N->getOperand(1);
18948   if (isAllActivePredicate(DAG, N->getOperand(1)))
18949     return N->getOperand(0);
18954   SDValue Mask = N->getOperand(1);
18961   // SVE load instructions perform an implicit zero-extend, which makes them
18967     MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
18984     MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
19003   SDValue SetCC = N->getOperand(0);
19004   EVT VT = N->getValueType(0);
19010   for (auto U : N->uses())
19011     if (U->getOpcode() == ISD::SELECT)
19014   // Check if the operand is a SETCC node with floating-point comparison
19039   SDValue LHS = N->getOperand(0);
19040   SDValue RHS = N->getOperand(1);
19041   EVT VT = N->getValueType(0);
19102   SDValue LHS = N->getOperand(0);
19103   SDValue RHS = N->getOperand(1);
19104   EVT VT = N->getValueType(0);
19107   if (!N->getFlags().hasAllowReassociation())
19110   // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
19122         DAG.getNode(ISD::FADD, DL, VT, A.getOperand(1), B, N->getFlags()),
19124     VCMLA->setFlags(A->getFlags());
19174   assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19176   if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
19179   SDValue N0 = N->getOperand(0);
19183       !isNullConstant(N->getOperand(1)))
19187   // flag-setting operation.
19194   return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE);
19197 // Materialize : Idx = (add (mul vscale, NumEls), -1)
19204   assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19206   if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
19209   SDValue N0 = N->getOperand(0);
19215   // Idx == (add (mul vscale, NumEls), -1)
19216   SDValue Idx = N->getOperand(1);
19228   // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
19231   return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE);
19237   assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19244   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
19246   EVT VT = N->getValueType(0);
19248   bool IsStrict = N0->isStrictFPOpcode();
19250   // extract(dup x) -> x
19259   // ->
19264   if (isNullConstant(N1) && hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
19267     SDValue N00 = N0->getOperand(IsStrict ? 1 : 0);
19268     SDValue N01 = N0->getOperand(IsStrict ? 2 : 1);
19279     if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
19280         Other == Shuffle->getOperand(0)) {
19286         return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2);
19292       SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
19294                                 {N0->getOperand(0), Extract1, Extract2});
19308   EVT VT = N->getValueType(0);
19309   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
19310   unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
19319   // ->
19323   // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
19325   // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
19326   if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
19328     SDValue N00 = N0->getOperand(0);
19329     SDValue N10 = N1->getOperand(0);
19347   if (N->getOperand(0).getValueType() == MVT::v4i8 ||
19348       N->getOperand(0).getValueType() == MVT::v2i16 ||
19349       N->getOperand(0).getValueType() == MVT::v2i8) {
19350     EVT SrcVT = N->getOperand(0).getValueType();
19354     if (N->getNumOperands() % 2 == 0 &&
19355         all_of(N->op_values(), [SrcVT](SDValue V) {
19361           return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
19362                  LD->getExtensionType() == ISD::NON_EXTLOAD;
19365       EVT NVT = EVT::getVectorVT(*DAG.getContext(), FVT, N->getNumOperands());
19368       for (unsigned i = 0; i < N->getNumOperands(); i++) {
19369         SDValue V = N->getOperand(i);
19374           SDValue NewLoad = DAG.getLoad(FVT, dl, LD->getChain(),
19375                                         LD->getBasePtr(), LD->getMemOperand());
19380       return DAG.getBitcast(N->getValueType(0),
19390   // ->
19393   if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
19394       N1Opc == ISD::TRUNCATE && N->isOnlyUserOf(N0.getNode()) &&
19395       N->isOnlyUserOf(N1.getNode())) {
19397       return V->getOpcode() == ISD::XOR &&
19400     SDValue N00 = N0->getOperand(0);
19401     SDValue N10 = N1->getOperand(0);
19402     if (isBitwiseVectorNegate(N00) && N0->isOnlyUserOf(N00.getNode()) &&
19403         isBitwiseVectorNegate(N10) && N1->isOnlyUserOf(N10.getNode())) {
19408                                   N00->getOperand(0)),
19410                                   N10->getOperand(0))),
19420   // Optimise concat_vectors of two identical binops with a 128-bit destination
19422   // concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d))
19423   if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() &&
19424       DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() &&
19425       N1->hasOneUse()) {
19426     SDValue N00 = N0->getOperand(0);
19427     SDValue N01 = N0->getOperand(1);
19428     SDValue N10 = N1->getOperand(0);
19429     SDValue N11 = N1->getOperand(1);
19459     if (Imm != 1ULL << (ShtAmt - 1))
19464   // concat(rshrn(x), rshrn(y)) -> rshrn(concat(x, y))
19465   if (N->getNumOperands() == 2 && IsRSHRN(N0) &&
19477         DAG.getConstant(1ULL << (N0.getConstantOperandVal(1) - 1), dl, BVT));
19484   if (N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
19497   if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
19503   // Canonicalise concat_vectors so that the right-hand vector has as few
19504   // bit-casts as possible before its real operation. The primary matching
19506   // which depend on the operation being performed on this right-hand vector.
19512   if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
19514   SDValue RHS = N1->getOperand(0);
19521       dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
19537   EVT VT = N->getValueType(0);
19541   SDValue V = N->getOperand(0);
19544   // blocks this combine because the non-const case requires custom lowering.
19546   // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
19558   SDValue Vec = N->getOperand(0);
19559   SDValue SubVec = N->getOperand(1);
19560   uint64_t IdxVal = N->getConstantOperandVal(2);
19580   // Fold insert_subvector -> concat_vectors
19581   // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
19582   // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
19609   // register allocator to avoid cross-class register copies that aren't
19613   SDValue Op1 = N->getOperand(1);
19618   SDValue IID = N->getOperand(0);
19619   SDValue Shift = N->getOperand(2);
19622   EVT ResTy = N->getValueType(0);
19644 // AArch64 high-vector "long" operations are formed by performing the non-high
19653 //  (dupv64 scalar) --> (extract_high (dup128 scalar))
19657 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
19693     N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops());
19749     SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
19755   // - csel 1, 0, cc
19756   // - csel 0, 1, !cc
19777   if (!TValue->isOne()) {
19783   return TValue->isOne() && FValue->isZero();
19791     isSetCC(Op->getOperand(0), Info));
19796 //   -->
19801   assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
19802   SDValue LHS = Op->getOperand(0);
19803   SDValue RHS = Op->getOperand(1);
19822                   ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
19823                   : InfoAndKind.Info.Generic.Opnd0->getValueType();
19841   EVT VT = Op->getValueType(0);
19846 // ADD(UADDV a, UADDV b) -->  UADDV(ADD a, b)
19848   EVT VT = N->getValueType(0);
19850   if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
19853   SDValue LHS = N->getOperand(0);
19854   SDValue RHS = N->getOperand(1);
19859   auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
19860   auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
19861   if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
19864   SDValue Op1 = LHS->getOperand(0);
19865   SDValue Op2 = RHS->getOperand(0);
19875   EVT ValVT = Val1->getValueType(0);
19885 ///   CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc)
19887   EVT VT = N->getValueType(0);
19888   if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD)
19891   SDValue LHS = N->getOperand(0);
19892   SDValue RHS = N->getOperand(1);
19918         (CTVal->isOne() || CFVal->isOne())) &&
19920         (CTVal->isOne() || CFVal->isAllOnes())))
19924   if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() &&
19925       !CFVal->isOne()) {
19931   // Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc)
19932   if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() &&
19933       !CFVal->isAllOnes()) {
19934     APInt C = -1 * CFVal->getAPIntValue();
19942   APInt ADDC = CTVal->getAPIntValue();
19947   assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) ||
19948           (LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) &&
19958 // ADD(UDOT(zero, x, y), A) -->  UDOT(A, x, y)
19960   EVT VT = N->getValueType(0);
19961   if (N->getOpcode() != ISD::ADD)
19964   SDValue Dot = N->getOperand(0);
19965   SDValue A = N->getOperand(1);
19994 // (neg (csel X, Y)) -> (csel (neg X), (neg Y))
20003   SDValue CSel = N->getOperand(1);
20004   if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse())
20025 // which act on the high-half of their inputs. They are normally matched by
20030 // -> uaddl2 vD, vN, vM
20041   MVT VT = N->getSimpleValueType(0);
20043     if (N->getOpcode() == ISD::ADD)
20049   SDValue LHS = N->getOperand(0);
20050   SDValue RHS = N->getOperand(1);
20074   return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
20079          !Op.getNode()->hasAnyUseOfValue(0);
20103   SDValue CmpOp = Op->getOperand(2);
20115   SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1);
20120   return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(),
20121                      Op->getOperand(0), Op->getOperand(1),
20127   SDValue LHS = N->getOperand(0);
20128   SDValue RHS = N->getOperand(1);
20129   SDValue Cond = N->getOperand(2);
20134   EVT VT = N->getValueType(0);
20146   EVT VT = N->getValueType(0);
20150     SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1),
20151             Elt2 = N->getOperand(2), Elt3 = N->getOperand(3);
20152     if (Elt0->getOpcode() == ISD::FP_ROUND &&
20153         Elt1->getOpcode() == ISD::FP_ROUND &&
20154         isa<ConstantSDNode>(Elt0->getOperand(1)) &&
20155         isa<ConstantSDNode>(Elt1->getOperand(1)) &&
20156         Elt0->getConstantOperandVal(1) == Elt1->getConstantOperandVal(1) &&
20157         Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20158         Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20160         isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) &&
20161         isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) &&
20162         Elt0->getOperand(0)->getOperand(0) ==
20163             Elt1->getOperand(0)->getOperand(0) &&
20164         Elt0->getOperand(0)->getConstantOperandVal(1) == 0 &&
20165         Elt1->getOperand(0)->getConstantOperandVal(1) == 1) {
20166       SDValue LowLanesSrcVec = Elt0->getOperand(0)->getOperand(0);
20169         if (Elt2->getOpcode() == ISD::UNDEF &&
20170             Elt3->getOpcode() == ISD::UNDEF) {
20172         } else if (Elt2->getOpcode() == ISD::FP_ROUND &&
20173                    Elt3->getOpcode() == ISD::FP_ROUND &&
20174                    isa<ConstantSDNode>(Elt2->getOperand(1)) &&
20175                    isa<ConstantSDNode>(Elt3->getOperand(1)) &&
20176                    Elt2->getConstantOperandVal(1) ==
20177                        Elt3->getConstantOperandVal(1) &&
20178                    Elt2->getOperand(0)->getOpcode() ==
20180                    Elt3->getOperand(0)->getOpcode() ==
20183                    isa<ConstantSDNode>(Elt2->getOperand(0)->getOperand(1)) &&
20184                    isa<ConstantSDNode>(Elt3->getOperand(0)->getOperand(1)) &&
20185                    Elt2->getOperand(0)->getOperand(0) ==
20186                        Elt3->getOperand(0)->getOperand(0) &&
20187                    Elt2->getOperand(0)->getConstantOperandVal(1) == 0 &&
20188                    Elt3->getOperand(0)->getConstantOperandVal(1) == 1) {
20189           SDValue HighLanesSrcVec = Elt2->getOperand(0)->getOperand(0);
20199                              Elt0->getOperand(1));
20206     SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
20207     if (Elt0->getOpcode() == ISD::FP_EXTEND &&
20208         Elt1->getOpcode() == ISD::FP_EXTEND &&
20209         Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20210         Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20211         Elt0->getOperand(0)->getOperand(0) ==
20212             Elt1->getOperand(0)->getOperand(0) &&
20214         isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) &&
20215         isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) &&
20216         Elt0->getOperand(0)->getConstantOperandVal(1) + 1 ==
20217             Elt1->getOperand(0)->getConstantOperandVal(1) &&
20220         Elt0->getOperand(0)->getConstantOperandVal(1) %
20223       SDValue SrcVec = Elt0->getOperand(0)->getOperand(0);
20228         SDValue SubvectorIdx = Elt0->getOperand(0)->getOperand(1);
20238   // extract subvector where the inner vector is any-extended to the
20249   SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
20250   // Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT.
20251   if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20252       Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20254       isa<ConstantSDNode>(Elt0->getOperand(1)) &&
20255       isa<ConstantSDNode>(Elt1->getOperand(1)) &&
20257       Elt0->getOperand(0) == Elt1->getOperand(0) &&
20259       Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1) &&
20262       Elt0->getConstantOperandVal(1) % VT.getVectorMinNumElements() == 0) {
20263     SDValue VecToExtend = Elt0->getOperand(0);
20268     SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
20280   EVT VT = N->getValueType(0);
20281   SDValue N0 = N->getOperand(0);
20300       SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
20309     uint64_t AndMask = CSD->getZExtValue();
20318 // (N - Y) + Z --> (Z - Y) + N
20341   EVT VT = N->getValueType(0);
20352   if (N->getOpcode() != ISD::ADD)
20357   EVT VT = N->getValueType(0);
20362   SDValue LHS = N->getOperand(0);
20363   SDValue RHS = N->getOperand(1);
20377   // with LSL (shift > 4). For the rest of processors, this is no-op for
20390   if (N->getOpcode() != ISD::SUB)
20393   SDValue Add = N->getOperand(1);
20394   SDValue X = N->getOperand(0);
20412   EVT VT = N->getValueType(0);
20434   if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::SUB)
20437   if (!N->getValueType(0).isFixedLengthVector())
20440   auto performOpt = [&DAG, &N](SDValue Op0, SDValue Op1) -> SDValue {
20444     if (!cast<ConstantSDNode>(Op1->getOperand(1))->isZero())
20447     SDValue MulValue = Op1->getOperand(0);
20460         DAG.getNode(N->getOpcode(), SDLoc(N), ScalableVT, {ScaledOp, MulValue});
20461     return convertFromScalableVector(DAG, N->getValueType(0), NewValue);
20464   if (SDValue res = performOpt(N->getOperand(0), N->getOperand(1)))
20466   else if (N->getOpcode() == ISD::ADD)
20467     return performOpt(N->getOperand(1), N->getOperand(0));
20475   EVT VT = N->getValueType(0);
20477       DAG.getTargetLoweringInfo().isOperationExpand(N->getOpcode(), MVT::v1i64))
20479   SDValue Op0 = N->getOperand(0);
20480   SDValue Op1 = N->getOperand(1);
20504                      DAG.getNode(N->getOpcode(), DL, MVT::v1i64, Op0, Op1),
20510   if (!BV->hasOneUse())
20513     if (!Ld || !Ld->isSimple())
20521       if (!Ld || !Ld->isSimple() || !BV.getOperand(Op).hasOneUse())
20552       if (SV1->getMaskElt(I) != I ||
20553           SV1->getMaskElt(I + NumSubElts) != I + NumSubElts ||
20554           SV1->getMaskElt(I + NumSubElts * 2) != I + NumSubElts * 2 ||
20555           SV1->getMaskElt(I + NumSubElts * 3) != I + NumElts)
20558       if (SV2->getMaskElt(I) != I ||
20559           SV2->getMaskElt(I + NumSubElts) != I + NumSubElts ||
20560           SV2->getMaskElt(I + NumSubElts * 2) != I + NumElts)
20563     auto *Ld0 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(0));
20564     auto *Ld1 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(1));
20565     auto *Ld2 = dyn_cast<LoadSDNode>(SV2->getOperand(1).getOperand(0));
20567     if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() ||
20568         !Ld2->isSimple() || !Ld3->isSimple())
20593              unsigned Size = get<0>(L)->getValueType(0).getSizeInBits();
20594              return Size == get<1>(L)->getValueType(0).getSizeInBits() &&
20635   EVT VT = N->getValueType(0);
20641   SDValue Other = N->getOperand(0);
20642   SDValue Shift = N->getOperand(1);
20643   if (Shift.getOpcode() != ISD::SHL && N->getOpcode() != ISD::SUB)
20692             SDValue Load = DAG.getLoad(DLoadVT, SDLoc(L0), L0->getChain(),
20693                                        L0->getBasePtr(), L0->getPointerInfo(),
20694                                        L0->getOriginalAlign());
20703         for (const auto &[O0, O1] : zip(Op0->op_values(), Op1->op_values()))
20749   return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
20780 // Massage DAGs which we can use the high-half "long" operations on into
20783 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
20793   SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
20794   SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
20800   // just as well use the non-high version) so look for a corresponding extract
20814     return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
20816   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
20817                      N->getOperand(0), LHS, RHS);
20821   MVT ElemTy = N->getSimpleValueType(0).getScalarType();
20825   if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
20829     if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
20835   } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
20836     ShiftAmount = CVN->getSExtValue();
20842     return N->getOperand(1);
20877       ShiftAmount = -ShiftAmount;
20884   EVT VT = N->getValueType(0);
20885   SDValue Op = N->getOperand(1);
20892   if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
20894                      DAG.getConstant(-ShiftAmount, dl, MVT::i32));
20895     if (N->getValueType(0) == MVT::i64)
20902     if (N->getValueType(0) == MVT::i64)
20915   SDValue AndN = N->getOperand(2);
20920   if (!CMask || CMask->getZExtValue() != Mask)
20924                      N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
20930   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
20932                                  N->getOperand(1).getSimpleValueType(),
20933                                  N->getOperand(1)),
20939   SDValue Op1 = N->getOperand(1);
20940   SDValue Op2 = N->getOperand(2);
20946   SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
20947   SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
20948   SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
20949   SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
20950   return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
20955   SDValue Scalar = N->getOperand(3);
20961   SDValue Passthru = N->getOperand(1);
20962   SDValue Pred = N->getOperand(2);
20963   return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
20970   EVT VT = N->getValueType(0);
20974   // Current lowering only supports the SVE-ACLE types.
20984   SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
20985   SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
20986   SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
20999   SDValue Comparator = N->getOperand(3);
21003     EVT VT = N->getValueType(0);
21004     EVT CmpVT = N->getOperand(2).getValueType();
21005     SDValue Pred = N->getOperand(1);
21022         int64_t ImmVal = CN->getSExtValue();
21023         if (ImmVal >= -16 && ImmVal <= 15)
21036         uint64_t ImmVal = CN->getZExtValue();
21051                        N->getOperand(2), Splat, DAG.getCondCode(CC));
21099   SDValue Pred = N->getOperand(1);
21100   SDValue VecToReduce = N->getOperand(2);
21104   EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
21110   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21118   SDValue Pred = N->getOperand(1);
21119   SDValue VecToReduce = N->getOperand(2);
21127   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21135   SDValue Pred = N->getOperand(1);
21136   SDValue InitVal = N->getOperand(2);
21137   SDValue VecToReduce = N->getOperand(3);
21150   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21160   assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
21161   assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
21162   SDValue Pg = N->getOperand(1);
21163   SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
21164   SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
21169       return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
21171     return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
21184   if (!Subtarget->hasSVE2p1())
21187   if (!N->hasNUsesOfValue(2, 0))
21190   const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
21194   auto It = N->use_begin();
21198   if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
21199       Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR)
21202   uint64_t OffLo = Lo->getConstantOperandVal(1);
21203   uint64_t OffHi = Hi->getConstantOperandVal(1);
21213   EVT HalfVec = Lo->getValueType(0);
21214   if (HalfVec != Hi->getValueType(0) ||
21222   SDValue Idx = N->getOperand(1);
21223   SDValue TC = N->getOperand(2);
21230                   {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
21262     return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
21263                        N->getOperand(1), N->getOperand(2));
21265     return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
21266                        N->getOperand(1), N->getOperand(2));
21268     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
21269                        N->getOperand(1), N->getOperand(2));
21271     return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
21272                        N->getOperand(1), N->getOperand(2));
21274     return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0),
21275                        N->getOperand(1), N->getOperand(2));
21277     return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0),
21278                        N->getOperand(1), N->getOperand(2));
21280     return DAG.getNode(AArch64ISD::PMULL, SDLoc(N), N->getValueType(0),
21281                        N->getOperand(1), N->getOperand(2));
21293     return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
21294                        N->getOperand(1), N->getOperand(2));
21296     return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
21297                        N->getOperand(1), N->getOperand(2));
21306     if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
21331     return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
21332                        N->getOperand(1));
21336     return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0),
21337                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21339     return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0),
21340                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21342     return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0),
21343                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21345     return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0),
21346                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21348     return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0),
21349                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21351     return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0),
21352                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21354     return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0),
21355                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21357     return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0),
21358                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21360     return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0),
21361                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21363     return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0),
21364                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21366     return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0),
21367                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21369     return DAG.getNode(AArch64ISD::FDIV_PRED, SDLoc(N), N->getValueType(0),
21370                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21372     return DAG.getNode(AArch64ISD::FMAX_PRED, SDLoc(N), N->getValueType(0),
21373                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21375     return DAG.getNode(AArch64ISD::FMAXNM_PRED, SDLoc(N), N->getValueType(0),
21376                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21378     return DAG.getNode(AArch64ISD::FMA_PRED, SDLoc(N), N->getValueType(0),
21379                        N->getOperand(1), N->getOperand(3), N->getOperand(4),
21380                        N->getOperand(2));
21382     return DAG.getNode(AArch64ISD::FMIN_PRED, SDLoc(N), N->getValueType(0),
21383                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21385     return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0),
21386                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21388     return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0),
21389                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21391     return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0),
21392                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21394     return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2),
21395                        N->getOperand(3));
21397     return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2),
21398                        N->getOperand(3));
21402     return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), N->getOperand(2),
21403                        N->getOperand(3));
21405     return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0),
21406                        N->getOperand(2), N->getOperand(3));
21408     return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2),
21409                        N->getOperand(3));
21411     return DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), N->getOperand(2),
21412                        N->getOperand(3));
21414     return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
21415                        N->getOperand(2), N->getOperand(3));
21417     return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
21418                        N->getOperand(2), N->getOperand(3));
21420     return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0),
21421                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21423     return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0),
21424                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21428     return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
21429                        N->getOperand(2), N->getOperand(3));
21433     return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
21434                        N->getOperand(2), N->getOperand(3));
21436     return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
21437                        N->getOperand(1), N->getOperand(2));
21439     return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
21440                        N->getOperand(1), N->getOperand(2));
21442     return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
21443                        N->getOperand(1), N->getOperand(2));
21445     return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
21446                        N->getOperand(1), N->getOperand(2));
21448     return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
21449                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21451     if (!N->getOperand(2).getValueType().isFloatingPoint())
21453                          N->getValueType(0), N->getOperand(1), N->getOperand(2),
21454                          N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
21457     if (!N->getOperand(2).getValueType().isFloatingPoint())
21459                          N->getValueType(0), N->getOperand(1), N->getOperand(2),
21460                          N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
21465                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
21466                        N->getOperand(3), DAG.getCondCode(ISD::SETGE));
21471                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
21472                        N->getOperand(3), DAG.getCondCode(ISD::SETGT));
21477                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
21478                        N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
21483                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
21484                        N->getOperand(3), DAG.getCondCode(ISD::SETNE));
21488                        N->getValueType(0), N->getOperand(1), N->getOperand(2),
21489                        N->getOperand(3), DAG.getCondCode(ISD::SETUO));
21504     return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
21505                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
21527     return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
21530     return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
21533     return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
21542   unsigned OC = N->getOpcode();
21556   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
21557          N->getOperand(0)->getOpcode() == ISD::SETCC);
21558   const SDValue SetCC = N->getOperand(0);
21562   if (!CCOp0->getValueType(0).isInteger() ||
21563       !CCOp1->getValueType(0).isInteger())
21567       cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
21575         DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
21577         DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
21580         SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
21581         cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
21594   if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
21595       (N->getOperand(0).getOpcode() == ISD::ABDU ||
21596        N->getOperand(0).getOpcode() == ISD::ABDS)) {
21597     SDNode *ABDNode = N->getOperand(0).getNode();
21603     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
21606   if (N->getValueType(0).isFixedLengthVector() &&
21607       N->getOpcode() == ISD::SIGN_EXTEND &&
21608       N->getOperand(0)->getOpcode() == ISD::SETCC)
21631                    OrigAlignment, St.getMemOperand()->getFlags());
21634   if (BasePtr->getOpcode() == ISD::ADD &&
21635       isa<ConstantSDNode>(BasePtr->getOperand(1))) {
21636     BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
21637     BasePtr = BasePtr->getOperand(0);
21641   while (--NumVecElts) {
21648                           St.getMemOperand()->getFlags());
21686   EVT VT = N->getValueType(0);
21696   SDValue Ops[] = { N->getOperand(0), // Chain
21697                     N->getOperand(2), // Pg
21698                     N->getOperand(3), // Base
21712   EVT VT = N->getValueType(0);
21713   EVT PtrTy = N->getOperand(3).getValueType();
21721   SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
21722                                 MINode->getOperand(3), DAG.getUNDEF(PtrTy),
21723                                 MINode->getOperand(2), PassThru,
21724                                 MINode->getMemoryVT(), MINode->getMemOperand(),
21741   EVT VT = N->getValueType(0);
21747   SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
21759   SDValue Data = N->getOperand(2);
21773   SDValue Ops[] = { N->getOperand(0), // Chain
21775                     N->getOperand(4), // Base
21776                     N->getOperand(3), // Pg
21780   return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
21786   SDValue Data = N->getOperand(2);
21788   EVT PtrTy = N->getOperand(4).getValueType();
21794   return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
21795                             DAG.getUNDEF(PtrTy), MINode->getOperand(3),
21796                             MINode->getMemoryVT(), MINode->getMemOperand(),
21803 /// if the zero constant is not re-used, since one instructions and one register
21849     int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
21850     if (Offset < -512 || Offset > 504)
21904   std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
21921     uint64_t IndexVal = CIndex->getZExtValue();
21940   if (S->isVolatile() || S->isIndexed())
21943   SDValue StVal = S->getValue();
21959   if (!Subtarget->isMisaligned128StoreSlow())
21962   // Don't split at -Oz.
21967   // those up regresses performance on micro-benchmarks and olden/bh.
21976   if (VT.getSizeInBits() != 128 || S->getAlign() >= Align(16) ||
21977       S->getAlign() <= Align(2))
21995   SDValue BasePtr = S->getBasePtr();
21997       DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
21998                    S->getAlign(), S->getMemOperand()->getFlags());
22002                       S->getPointerInfo(), S->getAlign(),
22003                       S->getMemOperand()->getFlags());
22007   assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
22009   // splice(pg, op1, undef) -> op1
22010   if (N->getOperand(2).isUndef())
22011     return N->getOperand(1);
22018   assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
22019           N->getOpcode() == AArch64ISD::UUNPKLO) &&
22022   // uunpklo/hi undef -> undef
22023   if (N->getOperand(0).isUndef())
22024     return DAG.getUNDEF(N->getValueType(0));
22029   if (N->getOperand(0).getOpcode() == ISD::MLOAD &&
22030       N->getOpcode() == AArch64ISD::UUNPKLO) {
22031     MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0));
22032     SDValue Mask = MLD->getMask();
22035     if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD &&
22036         SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
22037         (MLD->getPassThru()->isUndef() ||
22038          isZerosVector(MLD->getPassThru().getNode()))) {
22039       unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
22040       unsigned PgPattern = Mask->getConstantOperandVal(0);
22041       EVT VT = N->getValueType(0);
22051             VT, DL, MLD->getChain(), MLD->getBasePtr(), MLD->getOffset(), Mask,
22052             PassThru, MLD->getMemoryVT(), MLD->getMemOperand(),
22053             MLD->getAddressingMode(), ISD::ZEXTLOAD);
22066   if (N->getOpcode() != AArch64ISD::UZP1)
22068   SDValue Op0 = N->getOperand(0);
22069   EVT SrcVT = Op0->getValueType(0);
22070   EVT DstVT = N->getValueType(0);
22078 //   uzp1(rshrnb(uunpklo(X),C), rshrnb(uunpkhi(X), C)) -> urshr(X, C)
22080   assert(N->getOpcode() == AArch64ISD::UZP1 && "Only UZP1 expected.");
22081   SDValue Op0 = N->getOperand(0);
22082   SDValue Op1 = N->getOperand(1);
22083   EVT ResVT = N->getValueType(0);
22111 //    t1 = nxv8i16 add(X, 1 << (ShiftValue - 1))
22121   EVT VT = Srl->getValueType(0);
22122   if (!VT.isScalableVector() || !Subtarget->hasSVE2())
22149   SDValue Op0 = N->getOperand(0);
22150   SDValue Op1 = N->getOperand(1);
22151   EVT ResVT = N->getValueType(0);
22153   // uzp(extract_lo(x), extract_hi(x)) -> extract_lo(uzp x, x)
22165       SDValue Uzp = DAG.getNode(N->getOpcode(), DL, WidenedResVT, SourceVec,
22173   if (N->getOpcode() == AArch64ISD::UZP2)
22176   // uzp1(x, undef) -> concat(truncate(x), undef)
22232   // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y)
22251   // truncating uzp1(x, y) -> xtn(concat (x, y))
22264   // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
22317   unsigned Opc = N->getOpcode();
22335   SDValue Chain = N->getOperand(0);
22336   SDValue Pg = N->getOperand(1);
22337   SDValue Base = N->getOperand(2);
22338   SDValue Offset = N->getOperand(3);
22339   SDValue Ty = N->getOperand(4);
22341   EVT ResVT = N->getValueType(0);
22353     EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
22355     // If the predicate for the sign- or zero-extended offset is the
22356     // same as the predicate used for this load and the sign-/zero-extension
22357     // was from a 32-bits...
22378   assert(N->getOpcode() == AArch64ISD::VASHR ||
22379          N->getOpcode() == AArch64ISD::VLSHR);
22381   SDValue Op = N->getOperand(0);
22384   unsigned ShiftImm = N->getConstantOperandVal(1);
22388   if (N->getOpcode() == AArch64ISD::VASHR &&
22390       N->getOperand(1) == Op.getOperand(1))
22395   if (N->getFlags().hasExact())
22408   // sunpklo(sext(pred)) -> sext(extract_low_half(pred))
22411   if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND &&
22412       N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
22414     SDValue CC = N->getOperand(0)->getOperand(0);
22415     auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
22418     return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk);
22424 /// Target-specific DAG combine function for post-increment LD1 (lane) and
22425 /// post-increment LD1R.
22433   EVT VT = N->getValueType(0);
22439   SDNode *LD = N->getOperand(LoadIdx).getNode();
22441   if (LD->getOpcode() != ISD::LOAD)
22447     Lane = N->getOperand(2);
22449     if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
22454   EVT MemVT = LoadSDN->getMemoryVT();
22461   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
22472   if (N->hasOneUse()) {
22473     unsigned UseOpc = N->use_begin()->getOpcode();
22478   SDValue Addr = LD->getOperand(1);
22479   SDValue Vector = N->getOperand(0);
22481   for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
22482        Addr.getNode()->use_end(); UI != UE; ++UI) {
22484     if (User->getOpcode() != ISD::ADD
22489     SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
22491       uint32_t IncVal = CInc->getZExtValue();
22511     Ops.push_back(LD->getOperand(0));  // Chain
22524                                            LoadSDN->getMemOperand());
22558   assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
22562     if (!Store->isTruncatingStore() || Store->isIndexed())
22564     SDValue Ext = Store->getValue();
22569     SDValue Orig = Ext->getOperand(0);
22570     if (Store->getMemoryVT() != Orig.getValueType())
22572     return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
22573                         Store->getBasePtr(), Store->getMemOperand());
22596   EVT MemVT = LD->getMemoryVT();
22598       LD->getOriginalAlign() >= 4)
22603   SDValue Chain = LD->getChain();
22604   SDValue BasePtr = LD->getBasePtr();
22605   MachineMemOperand *MMO = LD->getMemOperand();
22606   assert(LD->getOffset().isUndef() && "undef offset expected");
22635 // nontemporal loads larger than 256-bits loads for odd types so LDNPQ 256-bit
22641   if (Subtarget->supportsAddressTopByteIgnored())
22642     performTBISimplification(N->getOperand(1), DCI, DAG);
22645   if (LD->isVolatile() || !Subtarget->isLittleEndian())
22651   if (!LD->isNonTemporal())
22654   EVT MemVT = LD->getMemoryVT();
22661   SDValue Chain = LD->getChain();
22662   SDValue BasePtr = LD->getBasePtr();
22663   SDNodeFlags Flags = LD->getFlags();
22666   // Replace any non temporal load over 256-bit with a series of 256 bit loads
22667   // and a scalar/vector load less than 256. This way we can utilize 256-bit
22673   // Create all 256-bit loads starting from offset 0 and up to Num256Loads-1*32.
22678     Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
22680         NewVT, DL, Chain, NewPtr, LD->getPointerInfo().getWithOffset(PtrOffset),
22681         NewAlign, LD->getMemOperand()->getFlags(), LD->getAAInfo());
22688   // 256-bit loads and inserting the remaining load to it. We extract the
22691   unsigned PtrOffset = (MemVT.getSizeInBits() - BitsRemaining) / 8;
22697   Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
22700                   LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
22701                   LD->getMemOperand()->getFlags(), LD->getAAInfo());
22737   for (SDValue Operand : Op->op_values()) {
22774       unsigned BitsPerElement = std::max(64 / NumElts, 8u); // >= 64-bit vector
22794     // create 8x 16-bit values, and the perform the vector reduce.
22814   unsigned MaxBitMask = 1u << (VecVT.getVectorNumElements() - 1);
22829   if (!Store->isTruncatingStore())
22833   SDValue VecOp = Store->getValue();
22835   EVT MemVT = Store->getMemoryVT();
22854   return DAG.getStore(Store->getChain(), DL, ExtendedBits, Store->getBasePtr(),
22855                       Store->getMemOperand());
22867   SDValue Value = ST->getValue();
22870   if (ST->isVolatile() || !Subtarget->isLittleEndian() ||
22875   assert(ST->getOffset().isUndef() && "undef offset expected");
22879       Value->getOperand(0).getValueType().getVectorElementType(), 4);
22883       {UndefVector, Value->getOperand(0), DAG.getVectorIdxConstant(0, DL)});
22889   SDValue Chain = ST->getChain();
22890   MachineMemOperand *MMO = ST->getMemOperand();
22895   SDValue Ptr2 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset2, DL);
22901   SDValue Ptr1 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset1, DL);
22906   Chain = DAG.getStore(Chain, DL, E0, ST->getBasePtr(),
22916   SDValue Chain = ST->getChain();
22917   SDValue Value = ST->getValue();
22918   SDValue Ptr = ST->getBasePtr();
22934       Value.getNode()->hasOneUse() && ST->isUnindexed() &&
22935       Subtarget->useSVEForFixedLengthVectors() &&
22937       ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
22940                              ST->getMemoryVT(), ST->getMemOperand());
22945   if (Subtarget->supportsAddressTopByteIgnored() &&
22946       performTBISimplification(N->getOperand(2), DCI, DAG))
22955   if (ST->isTruncatingStore()) {
22956     EVT StoreVT = ST->getMemoryVT();
22960             trySimplifySrlAddToRshrnb(ST->getOperand(1), DAG, Subtarget)) {
22961       return DAG.getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(),
22962                                StoreVT, ST->getMemOperand());
22974   SDValue Value = MST->getValue();
22975   SDValue Mask = MST->getMask();
22981   if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() &&
22982       MST->isUnindexed() && Mask->getOpcode() == AArch64ISD::PTRUE &&
22991         unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
22992         unsigned PgPattern = Mask->getConstantOperandVal(0);
23000           return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0),
23001                                     MST->getBasePtr(), MST->getOffset(), Mask,
23002                                     MST->getMemoryVT(), MST->getMemOperand(),
23003                                     MST->getAddressingMode(),
23010   if (MST->isTruncatingStore()) {
23011     EVT ValueVT = Value->getValueType(0);
23012     EVT MemVT = MST->getMemoryVT();
23016       return DAG.getMaskedStore(MST->getChain(), DL, Rshrnb, MST->getBasePtr(),
23017                                 MST->getOffset(), MST->getMask(),
23018                                 MST->getMemoryVT(), MST->getMemOperand(),
23019                                 MST->getAddressingMode(), true);
23037   // ->
23052   // ->
23083   while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG))
23093   EVT DataVT = N->getOperand(1).getValueType();
23095   // will later be re-extended to 64 bits in legalization
23098   if (ISD::isVectorShrinkable(Index.getNode(), 32, N->isIndexSigned())) {
23108     Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue();
23118       Stride = Step << Shift->getZExtValue();
23156   SDValue Chain = MGS->getChain();
23157   SDValue Scale = MGS->getScale();
23158   SDValue Index = MGS->getIndex();
23159   SDValue Mask = MGS->getMask();
23160   SDValue BasePtr = MGS->getBasePtr();
23161   ISD::MemIndexType IndexType = MGS->getIndexType();
23169     SDValue PassThru = MGT->getPassThru();
23172         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
23173         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
23176   SDValue Data = MSC->getValue();
23178   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL,
23179                               Ops, MSC->getMemOperand(), IndexType,
23180                               MSC->isTruncatingStore());
23183 /// Target-specific DAG combine function for NEON load/store intrinsics
23191   unsigned AddrOpIdx = N->getNumOperands() - 1;
23192   SDValue Addr = N->getOperand(AddrOpIdx);
23195   for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
23196        UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
23198     if (User->getOpcode() != ISD::ADD ||
23219     unsigned IntNo = N->getConstantOperandVal(1);
23268       VecTy = N->getOperand(2).getValueType();
23270       VecTy = N->getValueType(0);
23273     SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
23275       uint32_t IncVal = CInc->getZExtValue();
23284     Ops.push_back(N->getOperand(0)); // Incoming chain
23288         Ops.push_back(N->getOperand(i));
23304                                            MemInt->getMemoryVT(),
23305                                            MemInt->getMemOperand());
23326   switch(V.getNode()->getOpcode()) {
23331     if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
23332        || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
23333       ExtType = LoadNode->getExtensionType();
23339     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
23340     if ((TypeNode->getVT() == MVT::i8 && width == 8)
23341        || (TypeNode->getVT() == MVT::i16 && width == 16)) {
23348     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
23349     if ((TypeNode->getVT() == MVT::i8 && width == 8)
23350        || (TypeNode->getVT() == MVT::i16 && width == 16)) {
23358     return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
23359            1LL << (width - 1);
23370 //  +-------------+ +-------------+ +-------------+ +-------------+
23372 //  +-------------+ +-------------+ +-------------+ +-------------+
23374 //           V           V           |    +----------+
23375 //          +-------------+  +----+  |    |
23377 //          +-------------+  +----+  |    |
23380 //                 +-------------+   |    |
23382 //                 +-------------+   |    |
23384 //                      +-----+      |    |
23387 //                           +-------------+
23389 //                           +-------------+
23401 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
23431   // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
23441     AddConstant -= (1 << (width-1));
23447         (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
23462        (AddConstant <= 0 && CompConstant >= -1 &&
23502 // (X & C) >u Mask --> (X & (C & (~Mask)) != 0
23503 // (X & C) <u Pow2 --> (X & (C & ~(Pow2-1)) == 0
23508   ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
23512   APInt SubsAP = SubsC->getAPIntValue();
23522   ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
23526   APInt MaskAP = CC == AArch64CC::HI ? SubsAP : (SubsAP - 1);
23529   APInt AndSMask = (~MaskAP) & AndC->getAPIntValue();
23531       AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
23532       DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
23535                       N->getOperand(CCIndex)->getValueType(0));
23545   SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
23547   return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
23555   unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
23556   SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
23557   unsigned CondOpcode = SubsNode->getOpcode();
23559   if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0) ||
23560       !SubsNode->hasOneUse())
23566   SDNode *AndNode = SubsNode->getOperand(0).getNode();
23569   if (AndNode->getOpcode() != ISD::AND)
23576   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
23577     uint32_t CNV = CN->getZExtValue();
23587   SDValue AddValue = AndNode->getOperand(0);
23594   SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
23595   SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
23596   SDValue SubsInputValue = SubsNode->getOperand(1);
23613                 cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
23614                 cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
23619   SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
23620                                SubsNode->getValueType(1));
23621   SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
23642   SDValue Chain = N->getOperand(0);
23643   SDValue Dest = N->getOperand(1);
23644   SDValue CCVal = N->getOperand(2);
23645   SDValue Cmp = N->getOperand(3);
23648   unsigned CC = CCVal->getAsZExtVal();
23658   if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
23693   unsigned CC = N->getConstantOperandVal(2);
23694   SDValue SUBS = N->getOperand(3);
23698     Zero = N->getOperand(0);
23699     CTTZ = N->getOperand(1);
23701     Zero = N->getOperand(1);
23702     CTTZ = N->getOperand(0);
23728       DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType());
23741   SDValue L = Op->getOperand(0);
23742   SDValue R = Op->getOperand(1);
23744       static_cast<AArch64CC::CondCode>(Op->getConstantOperandVal(2));
23746   SDValue OpCmp = Op->getOperand(3);
23758   SDValue X = CmpLHS->getOperand(0);
23759   SDValue Y = CmpLHS->getOperand(1);
23769   if (CX->getAPIntValue() == CY->getAPIntValue())
23773       static_cast<AArch64CC::CondCode>(CmpLHS->getConstantOperandVal(2));
23774   SDValue Cond = CmpLHS->getOperand(3);
23787   EVT VT = Op->getValueType(0);
23797   // CSEL x, x, cc -> x
23798   if (N->getOperand(0) == N->getOperand(1))
23799     return N->getOperand(0);
23804   // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
23805   // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
23812 // Try to re-use an already extended operand of a vector SetCC feeding a
23816   EVT Op0MVT = Op->getOperand(0).getValueType();
23817   if (!Op0MVT.isVector() || Op->use_empty())
23822   SDNode *FirstUse = *Op->use_begin();
23823   if (FirstUse->getOpcode() != ISD::VSELECT)
23825   EVT UseMVT = FirstUse->getValueType(0);
23828   if (any_of(Op->uses(), [&UseMVT](const SDNode *N) {
23829         return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
23834   if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V))
23840   ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get();
23842   // split the SET_CC and re-use the extended version of the operand.
23844                                         Op->getOperand(0));
23846                                         Op->getOperand(0));
23849     Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1));
23852     Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
23857                      Op0ExtV, Op1ExtV, Op->getOperand(2));
23863   SDValue Vec = N->getOperand(0);
23869     return getVectorBitwiseReduce(N->getOpcode(), Vec, N->getValueType(0), DL,
23879   assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
23880   SDValue LHS = N->getOperand(0);
23881   SDValue RHS = N->getOperand(1);
23882   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
23884   EVT VT = N->getValueType(0);
23891       LHS->getOpcode() == AArch64ISD::CSEL &&
23892       isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
23893       LHS->hasOneUse()) {
23907   // setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
23909       LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
23910       LHS->getConstantOperandVal(1) < VT.getScalarSizeInBits() &&
23911       LHS->hasOneUse()) {
23912     EVT TstVT = LHS->getValueType(0);
23915       uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1);
23916       SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0),
23918       return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2));
23924   // setcc (iN (bitcast (vNi1 X))), -1, (eq|ne)
23925   //   ==> setcc (iN (sext (i1 (vecreduce_and (vNi1 X))))), -1, (eq|ne)
23929       LHS->getOpcode() == ISD::BITCAST) {
23930     EVT ToVT = LHS->getValueType(0);
23931     EVT FromVT = LHS->getOperand(0).getValueType();
23936                         DL, MVT::i1, LHS->getOperand(0));
23950 // Replace a flag-setting operator (eg ANDS) with the generic version
23956   SDValue LHS = N->getOperand(0);
23957   SDValue RHS = N->getOperand(1);
23958   EVT VT = N->getValueType(0);
23961   if (!N->hasAnyUseOfValue(1)) {
23962     SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
23967   // Combine identical generic nodes into this node, re-using the result.
23979   SDValue Pred = N->getOperand(0);
23980   SDValue LHS = N->getOperand(1);
23981   SDValue RHS = N->getOperand(2);
23982   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
23985       LHS->getOpcode() != ISD::SIGN_EXTEND)
23988   SDValue Extract = LHS->getOperand(0);
23989   if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
23990       Extract->getValueType(0) != N->getValueType(0) ||
23991       Extract->getConstantOperandVal(1) != 0)
23994   SDValue InnerSetCC = Extract->getOperand(0);
23995   if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
24006       Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
24007       Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
24015   assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
24019   SDValue Pred = N->getOperand(0);
24020   SDValue LHS = N->getOperand(1);
24021   SDValue RHS = N->getOperand(2);
24022   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
24028       LHS->getOpcode() == ISD::SIGN_EXTEND &&
24029       LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) {
24033     if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
24034         LHS->getOperand(0)->getOperand(0) == Pred)
24035       return LHS->getOperand(0);
24039     // -> nxvNi1 ...
24041       return LHS->getOperand(0);
24045     // -> nxvNi1 and(pred, ...)
24049       return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0),
24050                          LHS->getOperand(0), Pred);
24063   if (!Op->hasOneUse())
24066   // We don't handle undef/constant-fold cases below, as they should have
24070   // (tbz (trunc x), b) -> (tbz x, b)
24072   if (Op->getOpcode() == ISD::TRUNCATE &&
24073       Bit < Op->getValueType(0).getSizeInBits()) {
24074     return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24077   // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
24078   if (Op->getOpcode() == ISD::ANY_EXTEND &&
24079       Bit < Op->getOperand(0).getValueSizeInBits()) {
24080     return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24083   if (Op->getNumOperands() != 2)
24086   auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
24090   switch (Op->getOpcode()) {
24094   // (tbz (and x, m), b) -> (tbz x, b)
24096     if ((C->getZExtValue() >> Bit) & 1)
24097       return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24100   // (tbz (shl x, c), b) -> (tbz x, b-c)
24102     if (C->getZExtValue() <= Bit &&
24103         (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
24104       Bit = Bit - C->getZExtValue();
24105       return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24109   // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
24111     Bit = Bit + C->getZExtValue();
24112     if (Bit >= Op->getValueType(0).getSizeInBits())
24113       Bit = Op->getValueType(0).getSizeInBits() - 1;
24114     return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24116   // (tbz (srl x, c), b) -> (tbz x, b+c)
24118     if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
24119       Bit = Bit + C->getZExtValue();
24120       return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24124   // (tbz (xor x, -1), b) -> (tbnz x, b)
24126     if ((C->getZExtValue() >> Bit) & 1)
24128     return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24132 // Optimize test single bit zero/non-zero and branch.
24136   unsigned Bit = N->getConstantOperandVal(2);
24138   SDValue TestSrc = N->getOperand(1);
24144   unsigned NewOpc = N->getOpcode();
24155   return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
24156                      DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
24165   auto SelectA = N->getOperand(1);
24166   auto SelectB = N->getOperand(2);
24167   auto NTy = N->getValueType(0);
24171   SDValue SetCC = N->getOperand(0);
24186   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
24197 // vselect (v1i1 setcc) ->
24206   SDValue N0 = N->getOperand(0);
24210     return N->getOperand(1);
24213     return N->getOperand(2);
24215   // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
24216   // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
24218   SDValue SetCC = N->getOperand(0);
24224     SDNode *SplatLHS = N->getOperand(1).getNode();
24225     SDNode *SplatRHS = N->getOperand(2).getNode();
24227     if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
24237           NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
24242       auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
24254   EVT ResVT = N->getValueType(0);
24260   SDValue IfTrue = N->getOperand(1);
24261   SDValue IfFalse = N->getOperand(2);
24264                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
24270 /// the compare-mask instructions rather than going via NZCV, even if LHS and
24276   SDValue N0 = N->getOperand(0);
24277   EVT ResVT = N->getValueType(0);
24289          "Scalar-SETCC feeding SELECT has unexpected result type!");
24292   // largest real NEON comparison is 64-bits per lane, which means the result is
24293   // at most 32-bits and an illegal vector. Just bail out for now.
24335   return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
24340   EVT VT = N->getValueType(0);
24346     SmallVector<SDValue> Ops(N->ops());
24347     if (SDNode *LN = DCI.DAG.getNodeIfExists(N->getOpcode(),
24354   if (N->getOpcode() == AArch64ISD::DUP) {
24363       SDValue EXTRACT_VEC_ELT = N->getOperand(0);
24381   if (N->getValueType(0) == N->getOperand(0).getValueType())
24382     return N->getOperand(0);
24383   if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
24384     return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0),
24385                        N->getOperand(0).getOperand(0));
24392 // globaladdr as (globaladdr + constant) - constant.
24397   if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
24401   uint64_t MinOffset = -1ull;
24402   for (SDNode *N : GN->uses()) {
24403     if (N->getOpcode() != ISD::ADD)
24405     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
24407       C = dyn_cast<ConstantSDNode>(N->getOperand(1));
24410     MinOffset = std::min(MinOffset, C->getZExtValue());
24412   uint64_t Offset = MinOffset + GN->getOffset();
24416   // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
24417   if (Offset <= uint64_t(GN->getOffset()))
24432   const GlobalValue *GV = GN->getGlobal();
24433   Type *T = GV->getValueType();
24434   if (!T->isSized() ||
24435       Offset > GV->getDataLayout().getTypeAllocSize(T))
24446   SDValue BR = N->getOperand(0);
24447   if (!Subtarget->hasCSSC() || BR.getOpcode() != ISD::BITREVERSE ||
24499                             OffsetConst->getZExtValue(), ScalarSizeInBytes);
24505   const SDValue Src = N->getOperand(2);
24506   const EVT SrcVT = Src->getValueType(0);
24528   SDValue Base = N->getOperand(4);
24531   SDValue Offset = N->getOperand(5);
24534   // applies to non-temporal scatters because there's no instruction that takes
24546   // In the case of non-temporal gather loads there's only one SVE instruction
24547   // per data-size: "scalar + vector", i.e.
24559   // immediates outside that range and non-immediate scalar offsets use SST1 or
24590   // Keep the original type of the input data to store - this is needed to be
24605   SDValue Ops[] = {N->getOperand(0), // Chain
24607                    N->getOperand(3), // Pg
24618   const EVT RetVT = N->getValueType(0);
24630   SDValue Base = N->getOperand(3);
24633   SDValue Offset = N->getOperand(4);
24636   // offsets. This applies to non-temporal and quadword gathers, which do not
24648   // In the case of non-temporal gather loads and quadword gather loads there's
24662   // immediates outside that range and non-immediate scalar offsets use
24695   // Keep the original output value type around - this is needed to be able to
24703   SDValue Ops[] = {N->getOperand(0), // Chain
24704                    N->getOperand(2), // Pg
24725   SDValue Src = N->getOperand(0);
24726   unsigned Opc = Src->getOpcode();
24728   // Sign extend of an unsigned unpack -> signed unpack
24738     // ->
24740     // ->
24742     SDValue ExtOp = Src->getOperand(0);
24743     auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
24755     return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
24830   EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
24831   EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
24836   EVT DstVT = N->getValueType(0);
24840   for (unsigned I = 0; I < Src->getNumOperands(); ++I)
24841     Ops.push_back(Src->getOperand(I));
24852 /// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
24856   SDValue Offset = N->getOperand(OffsetPos);
24862   // Extend the unpacked offset vector to 64-bit lanes.
24865   SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
24866   // Replace the offset operand with the 64-bit one.
24869   return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
24881   if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
24885   SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
24893   return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
24922   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
24923   SDValue InsertVec = N->getOperand(0);
24924   SDValue InsertElt = N->getOperand(1);
24925   SDValue InsertIdx = N->getOperand(2);
24944   // If we get here we are effectively trying to zero lanes 1-N of a vector.
24947   if (N->getValueType(0) != ExtractVec.getValueType())
24968   SDValue N0 = N->getOperand(0);
24969   EVT VT = N->getValueType(0);
24972   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND)
24980   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
24984       N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
24986       VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) {
24989                                      LN0->getChain(), LN0->getBasePtr(),
24990                                      N0.getValueType(), LN0->getMemOperand());
25005   EVT VT = N->getValueType(0);
25008   if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
25013   SDValue Mask = N->getOperand(0);
25014   SDValue In1 = N->getOperand(1);
25015   SDValue In2 = N->getOperand(2);
25024   EVT VT = N->getValueType(0);
25026   SDValue Insert = N->getOperand(0);
25034   uint64_t IdxDupLane = N->getConstantOperandVal(1);
25052                   DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
25054                                       NewInsert, N->getOperand(1));
25065   SDValue LHS = N->getOperand(0);
25066   SDValue RHS = N->getOperand(1);
25094   // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
25117   if (ExtractHighSrcVec->use_size() != 2)
25121   for (SDNode *User : ExtractHighSrcVec.getNode()->uses()) {
25125     if (User->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
25126         !isNullConstant(User->getOperand(1))) {
25134   if (!ExtractLow || !ExtractLow->hasOneUse())
25139     SDNode *ExtractLowUser = *ExtractLow.getNode()->use_begin();
25140     if (ExtractLowUser->getOpcode() != N->getOpcode()) {
25143       if (ExtractLowUser->getOperand(0) == ExtractLow) {
25144         if (ExtractLowUser->getOperand(1).getOpcode() == ISD::TRUNCATE)
25145           TruncLow = ExtractLowUser->getOperand(1);
25149         if (ExtractLowUser->getOperand(0).getOpcode() == ISD::TRUNCATE)
25150           TruncLow = ExtractLowUser->getOperand(0);
25159   // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
25222   EVT VT = N->getValueType(0);
25226   SDValue ZEXT = N->getOperand(0);
25258   switch (N->getOpcode()) {
25291         APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
25293         APInt::getAllOnes(N->getValueType(0).getVectorNumElements());
25415     switch (N->getConstantOperandVal(1)) {
25566       unsigned IntrinsicID = N->getConstantOperandVal(1);
25573           N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
25583                          DAG.getVTList(MVT::Other), N->getOperand(0),
25584                          N->getOperand(2), N->getOperand(3));
25587                          DAG.getVTList(MVT::Other), N->getOperand(0),
25588                          N->getOperand(2), N->getOperand(3));
25604 // we can't perform a tail-call. In particular, we need to check for
25609   if (N->getNumValues() != 1)
25611   if (!N->hasNUsesOfValue(1, 0))
25615   SDNode *Copy = *N->use_begin();
25616   if (Copy->getOpcode() == ISD::CopyToReg) {
25619     if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
25622     TCChain = Copy->getOperand(0);
25623   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
25627   for (SDNode *Node : Copy->uses()) {
25628     if (Node->getOpcode() != AArch64ISD::RET_GLUE)
25645   return CI->isTailCall();
25652   if (!CstOffset || CstOffset->isZero())
25658   return isInt<9>(CstOffset->getSExtValue());
25665   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
25668   // Non-null if there is exactly one user of the loaded value (ignoring chain).
25670   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
25677       ValOnlyUser = nullptr; // Multiple non-chain uses, bail out.
25688   if (ValOnlyUser && ValOnlyUser->getValueType(0).isScalableVector() &&
25689       (ValOnlyUser->getOpcode() == ISD::SPLAT_VECTOR ||
25690        (ValOnlyUser->getOpcode() == AArch64ISD::DUP_MERGE_PASSTHRU &&
25691         IsUndefOrZero(ValOnlyUser->getOperand(2)))))
25694   Base = Op->getOperand(0);
25697   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
25698     int64_t RHSC = RHS->getSExtValue();
25699     if (Op->getOpcode() == ISD::SUB)
25700       RHSC = -(uint64_t)RHSC;
25703     // Always emit pre-inc/post-inc addressing mode. Use negated constant offset
25705     Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0));
25718     VT = LD->getMemoryVT();
25719     Ptr = LD->getBasePtr();
25721     VT = ST->getMemoryVT();
25722     Ptr = ST->getBasePtr();
25738     VT = LD->getMemoryVT();
25739     Ptr = LD->getBasePtr();
25741     VT = ST->getMemoryVT();
25742     Ptr = ST->getBasePtr();
25748   // Post-indexing updates the base, so it's not a valid transform
25760   SDValue Op = N->getOperand(0);
25761   EVT VT = N->getValueType(0);
25788   SDValue Op = N->getOperand(0);
25789   EVT VT = N->getValueType(0);
25802   SDValue Op = N->getOperand(0);
25803   EVT VT = N->getValueType(0);
25823            "Expected fp->int bitcast!");
25854   EVT VT = N->getValueType(0);
25857        !N->getFlags().hasAllowReassociation()) ||
25858       (VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
25862   SDValue X = N->getOperand(0);
25863   auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(1));
25865     Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
25866     X = N->getOperand(1);
25871   if (Shuf->getOperand(0) != X || !Shuf->getOperand(1)->isUndef())
25875   ArrayRef<int> Mask = Shuf->getMask();
25877     if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1))
25906   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
25915   SDValue In = N->getOperand(0);
25923   EVT VT = N->getValueType(0);
25932   auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
25936   unsigned Index = CIndex->getZExtValue();
25943   SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
25966   assert(N->getValueType(0) == MVT::i128 &&
25969   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
25970   if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
25971     // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
25974         createGPRPairNode(DAG, N->getOperand(2)), // Compare value
25975         createGPRPairNode(DAG, N->getOperand(3)), // Store value
25976         N->getOperand(1), // Ptr
25977         N->getOperand(0), // Chain in
25981     switch (MemOp->getMergedOrdering()) {
26017   switch (MemOp->getMergedOrdering()) {
26036   auto Desired = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
26037   auto New = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
26038   SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
26039                    New.first,        New.second,    N->getOperand(0)};
26053   // LowerATOMIC_LOAD_AND). We can't take that approach with 128-bit, because
26054   // the type is not legal. Therefore we shouldn't expect to see a 128-bit
26129   // LSE128 has a 128-bit RMW ops, but i128 is not a legal type, so lower it
26137   assert(N->getValueType(0) == MVT::i128 &&
26140   if (!Subtarget->hasLSE128())
26143   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
26144   const SDValue &Chain = N->getOperand(0);
26145   const SDValue &Ptr = N->getOperand(1);
26146   const SDValue &Val128 = N->getOperand(2);
26150   const unsigned ISDOpcode = N->getOpcode();
26152       getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering());
26158                     DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.first);
26161                     DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.second);
26184   switch (N->getOpcode()) {
26239     assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
26246     assert(N->getValueType(0) != MVT::i128 &&
26247            "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
26252     assert(cast<AtomicSDNode>(N)->getVal().getValueType() == MVT::i128 &&
26253            "Expected 128-bit atomicrmw.");
26261     EVT MemVT = LoadNode->getMemoryVT();
26262     // Handle lowering 256 bit non temporal loads into LDNP for little-endian
26264     if (LoadNode->isNonTemporal() && Subtarget->isLittleEndian() &&
26276           {LoadNode->getChain(), LoadNode->getBasePtr()},
26277           LoadNode->getMemoryVT(), LoadNode->getMemOperand());
26285     if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
26286         LoadNode->getMemoryVT() != MVT::i128) {
26287       // Non-volatile or atomic loads are optimized later in AArch64's load/store
26295           AN && AN->getSuccessOrdering() == AtomicOrdering::Acquire;
26299         assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
26303           {LoadNode->getChain(), LoadNode->getBasePtr()},
26304           LoadNode->getMemoryVT(), LoadNode->getMemOperand());
26310                       Result.getValue(FirstRes), Result.getValue(1 - FirstRes));
26321     // CONCAT_VECTORS -- but delegate to common code for result type
26325     EVT VT = N->getValueType(0);
26328         static_cast<Intrinsic::ID>(N->getConstantOperandVal(0));
26336       auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
26338                            N->getOperand(1), Op2, N->getOperand(3));
26346       auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
26348                            N->getOperand(1), Op2, N->getOperand(3));
26357                            N->getOperand(1), N->getOperand(2));
26366                            N->getOperand(1), N->getOperand(2));
26380       auto V = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, NewVT, N->ops());
26388     assert(N->getValueType(0) == MVT::i128 &&
26389            "READ_REGISTER custom lowering is only for 128-bit sysregs");
26390     SDValue Chain = N->getOperand(0);
26391     SDValue SysRegName = N->getOperand(1);
26398     // of the 128-bit System Register value.
26409   if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
26431 // In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
26432 // provided the address is 16-byte aligned.
26434   if (!Subtarget->hasLSE2())
26438     return LI->getType()->getPrimitiveSizeInBits() == 128 &&
26439            LI->getAlign() >= Align(16);
26442     return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
26443            SI->getAlign() >= Align(16);
26449   if (!Subtarget->hasLSE128())
26455     return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
26456            SI->getAlign() >= Align(16) &&
26457            (SI->getOrdering() == AtomicOrdering::Release ||
26458             SI->getOrdering() == AtomicOrdering::SequentiallyConsistent);
26461     return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
26462            RMW->getAlign() >= Align(16) &&
26463            (RMW->getOperation() == AtomicRMWInst::Xchg ||
26464             RMW->getOperation() == AtomicRMWInst::And ||
26465             RMW->getOperation() == AtomicRMWInst::Or);
26471   if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
26475     return LI->getType()->getPrimitiveSizeInBits() == 128 &&
26476            LI->getAlign() >= Align(16) &&
26477            LI->getOrdering() == AtomicOrdering::Acquire;
26480     return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
26481            SI->getAlign() >= Align(16) &&
26482            SI->getOrdering() == AtomicOrdering::Release;
26500   // Store-Release instructions only provide seq_cst guarantees when paired with
26501   // Load-Acquire instructions. MSVC CRT does not use these instructions to
26504   if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
26507   switch (I->getOpcode()) {
26511     return cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() ==
26514     return cast<AtomicRMWInst>(I)->getOrdering() ==
26517     return cast<StoreInst>(I)->getOrdering() ==
26522 // Loads and stores less than 128-bits are already atomic; ones above that
26527   unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
26539 // Loads and stores less than 128-bits are already atomic; ones above that
26544   unsigned Size = LI->getType()->getPrimitiveSizeInBits();
26554   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
26558   // succeed. So at -O0 lower this operation to a CAS loop.
26564   return Subtarget->hasLSE() ? AtomicExpansionKind::CmpXChg
26569 // However, with the LSE instructions (or outline-atomics mode, which provides
26570 // library routines in place of the LSE-instructions), we can directly emit many
26573 // Floating-point operations are always emitted to a cmpxchg loop, because they
26577   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
26580   if (AI->isFloatingPointOperation())
26583   bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
26584                       (AI->getOperation() == AtomicRMWInst::Xchg ||
26585                        AI->getOperation() == AtomicRMWInst::Or ||
26586                        AI->getOperation() == AtomicRMWInst::And);
26592   if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
26593     if (Subtarget->hasLSE())
26595     if (Subtarget->outlineAtomics()) {
26598       // (1) high level <atomic> support approved:
26599       //   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
26600       // (2) low level libgcc and compiler-rt support implemented by:
26602       if (AI->getOperation() != AtomicRMWInst::Min &&
26603           AI->getOperation() != AtomicRMWInst::Max &&
26604           AI->getOperation() != AtomicRMWInst::UMin &&
26605           AI->getOperation() != AtomicRMWInst::UMax) {
26611   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
26615   // succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
26618       Subtarget->hasLSE())
26628   if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
26630   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
26634   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
26638   // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
26640   unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
26650   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
26653   // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
26656   if (ValueTy->getPrimitiveSizeInBits() == 128) {
26671   Type *Tys[] = { Addr->getType() };
26676   const DataLayout &DL = M->getDataLayout();
26679   CI->addParamAttr(
26688   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
26695   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
26701   if (Val->getType()->getPrimitiveSizeInBits() == 128) {
26705     Type *Int64Ty = Type::getInt64Ty(M->getContext());
26714   Type *Tys[] = { Addr->getType() };
26717   const DataLayout &DL = M->getDataLayout();
26718   IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
26723                  Val, Stxr->getFunctionType()->getParamType(0)),
26725   CI->addParamAttr(1, Attribute::get(Builder.getContext(),
26726                                      Attribute::ElementType, Val->getType()));
26733   if (!Ty->isArrayTy()) {
26734     const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
26750   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
26763   if (Subtarget->isTargetAndroid())
26768   if (Subtarget->isTargetFuchsia())
26769     return UseTlsOffset(IRB, -0x10);
26776   if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
26783         M.getOrInsertFunction(Subtarget->getSecurityCheckCookieName(),
26787       F->setCallingConv(CallingConv::Win64);
26788       F->addParamAttr(0, Attribute::AttrKind::InReg);
26797   if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
26804   if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
26805     return M.getFunction(Subtarget->getSecurityCheckCookieName());
26814   if (Subtarget->isTargetAndroid())
26819   if (Subtarget->isTargetFuchsia())
26820     return UseTlsOffset(IRB, -0x8);
26835   return Mask->getValue().isPowerOf2();
26855       !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
26863   AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
26864   AFI->setIsSplitCSR(true);
26870   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
26871   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
26875   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
26876   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
26877   MachineBasicBlock::iterator MBBI = Entry->begin();
26887     Register NewVR = MRI->createVirtualRegister(RC);
26889     // FIXME: this currently does not emit CFI pseudo-instructions, it works
26890     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
26892     // CFI pseudo-instructions.
26893     assert(Entry->getParent()->getFunction().hasFnAttribute(
26896     Entry->addLiveIn(*I);
26897     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
26900     // Insert the copy-back instructions right before the terminator.
26902       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
26903               TII->get(TargetOpcode::COPY), *I)
26913   // integer division, leaving the division as-is is a loss even in terms of
26921   // We want inc-of-add for scalars and sub-of-not for vectors.
26929   if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
26940   assert(MBBI->isCall() && MBBI->getCFIType() &&
26943   switch (MBBI->getOpcode()) {
26955   MachineOperand &Target = MBBI->getOperand(0);
26959   return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(AArch64::KCFI_CHECK))
26961       .addImm(MBBI->getCFIType())
26966   return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
26971   if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
27029     if (GV.isThreadLocal() && Subtarget->isTargetMachO())
27046       auto APF = MI.getOperand(1).getFPImm()->getValueAPF();
27057     APInt Imm = CI->getValue();
27058     InstructionCost Cost = TTI->getIntImmCost(
27059         Imm, CI->getType(), TargetTransformInfo::TCK_CodeSize);
27068       --MaxUses;
27089     if (Inst.getType()->isScalableTy()) {
27094       if (Inst.getOperand(i)->getType()->isScalableTy())
27098       if (AI->getAllocatedType()->isScalableTy())
27234   EVT MemVT = Load->getMemoryVT();
27244       LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
27245       DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
27246       Load->getAddressingMode(), Load->getExtensionType());
27249   if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
27251         Load->getMemoryVT().getVectorElementType());
27292   SDValue Mask = Load->getMask();
27296     assert(Load->getExtensionType() != ISD::NON_EXTLOAD &&
27305   if (Load->getPassThru()->isUndef()) {
27313     if (isZerosVector(Load->getPassThru().getNode()))
27318       ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
27319       Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
27320       Load->getAddressingMode(), Load->getExtensionType());
27325         convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
27340   EVT VT = Store->getValue().getValueType();
27342   EVT MemVT = Store->getMemoryVT();
27345   auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
27347   if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
27349         Store->getMemoryVT().getVectorElementType());
27362   return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
27363                             Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
27364                             Store->getMemOperand(), Store->getAddressingMode(),
27365                             Store->isTruncatingStore());
27373   EVT VT = Store->getValue().getValueType();
27376   auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
27377   SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
27380       Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
27381       Mask, Store->getMemoryVT(), Store->getMemOperand(),
27382       Store->getAddressingMode(), Store->isTruncatingStore());
27531   SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
27544   SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
27563     assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
27568     for (const SDValue &V : Op->op_values()) {
27575         EVT VTArg = VTNode->getVT().getVectorElementType();
27582              "Expected only legal fixed-width types");
27596   for (const SDValue &V : Op->op_values()) {
27606   return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
27621   for (const SDValue &V : Op->op_values()) {
27624     // Pass through non-vector operands.
27721           /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
27753   SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
27754   SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
27812   unsigned NumOperands = Op->getNumOperands();
27827                                 Op->getOperand(I), Op->getOperand(I + 1)));
27969   SDValue Chain = HG->getChain();
27970   SDValue Inc = HG->getInc();
27971   SDValue Mask = HG->getMask();
27972   SDValue Ptr = HG->getBasePtr();
27973   SDValue Index = HG->getIndex();
27974   SDValue Scale = HG->getScale();
27975   SDValue IntID = HG->getIntID();
27980   assert(CID->getZExtValue() == Intrinsic::experimental_vector_histogram_add &&
27992   MachineMemOperand *MMO = HG->getMemOperand();
27995       MMO->getPointerInfo(), MachineMemOperand::MOLoad, MMO->getSize(),
27996       MMO->getAlign(), MMO->getAAInfo());
27997   ISD::MemIndexType IndexType = HG->getIndexType();
28013       MMO->getPointerInfo(), MachineMemOperand::MOStore, MMO->getSize(),
28014       MMO->getAlign(), MMO->getAAInfo());
28087   uint64_t MaxOffset = APInt(BitsPerElt, -1, false).getZExtValue();
28099   // Bail out for 8-bits element types, because with 2048-bit SVE register
28110     // is not known at compile-time, we need to maintain a mask with 'VL' values
28114         Index += IndexLen - ElementsPerVectorReg;
28116         Index = Index - ElementsPerVectorReg;
28121     // For 8-bit elements and 1024-bit SVE registers and MaxOffset equals
28129   // Choosing an out-of-range index leads to the lane being zeroed vs zero
28131   // index elements. For i8 elements an out-of-range index could be a valid
28132   // for 2048-bit vector register size.
28133   for (unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
28183   auto ShuffleMask = SVN->getMask();
28193   auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT {
28199   if (SVN->isSplat()) {
28200     unsigned Lane = std::max(0, SVN->getSplatIndex());
28211       Imm == VT.getVectorNumElements() - 1) {
28217         DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
28242   if (Subtarget->hasSVE2p1() && EltSize == 64 &&
28284   // are actually sub-vectors of a larger SVE register. When mapping
28292   // when converting from fixed-length to scalable vector types (i.e. the start
28294   unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
28295   unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
28327   // 128-bits.
28328   if (MinSVESize || !Subtarget->isNeonAvailable())
28396     SDValue ShiftR = Op->getOperand(0);
28397     if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
28403     unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
28404     unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
28421     // used - simplify to just Val.
28422     return TLO.CombineTo(Op, ShiftR->getOperand(0));
28430     uint64_t BitsToClear = Op->getConstantOperandVal(1)
28431                            << Op->getConstantOperandVal(2);
28444       unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
28456         Known.Zero.setHighBits(BitWidth - RequiredBits);
28475   return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
28476          Subtarget->hasComplxNum();
28487   if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
28490   auto *ScalarTy = VTy->getScalarType();
28491   unsigned NumElements = VTy->getElementCount().getKnownMinValue();
28495   // power-of-2 size, as we later split them into the smallest supported size
28497   unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
28498   if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) ||
28502   if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
28503     unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
28507   return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
28508          ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
28515   VectorType *Ty = cast<VectorType>(InputA->getType());
28516   bool IsScalable = Ty->isScalableTy();
28517   bool IsInt = Ty->getElementType()->isIntegerTy();
28520       Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();
28526     int Stride = Ty->getElementCount().getKnownMinValue() / 2;
28561       auto *Mask = B.getAllOnesMask(Ty->getElementCount());
28586         auto *Mask = B.getAllOnesMask(Ty->getElementCount());
28610   unsigned Opc = N->getOpcode();
28612     if (any_of(N->uses(),
28613                [&](SDNode *Use) { return Use->getOpcode() == ISD::MUL; }))
28620   return Subtarget->getMinimumJumpTableEntries();
28628   if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
28643   if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
28662   assert(Subtarget->useSVEForFixedLengthVectors() && "Unexpected mode!");
28721   return !Subtarget->isTargetWindows() &&
28722          MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
28727   switch (N->getOpcode()) {
28734     assert(N->getNumValues() == 1 && "Expected one result!");
28735     assert(N->getNumOperands() == 1 && "Expected one operand!");
28736     EVT VT = N->getValueType(0);
28737     EVT OpVT = N->getOperand(0).getValueType();
28753     assert(N->getNumValues() == 1 && "Expected one result!");
28754     assert(N->getNumOperands() == 2 && "Expected two operands!");
28755     EVT VT = N->getValueType(0);
28756     EVT Op0VT = N->getOperand(0).getValueType();
28757     EVT Op1VT = N->getOperand(1).getValueType();