Lines Matching +full:push +full:- +full:ci +full:- +full:container

1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation  ----===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
108 #define DEBUG_TYPE "aarch64-lower"
118 "aarch64-elf-ldtls-generation", cl::Hidden,
123 EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
133 EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
138 static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
145 static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
153 "aarch64-enable-gisel-sve", cl::Hidden,
349 // Otherwise, it's either a constant discriminator, or a non-blended
351 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
352 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
353 AddrDisc = Disc->getOperand(1);
354 ConstDisc = Disc->getOperand(2);
360 // discriminator value) isn't a 16-bit constant, bail out, and let the
363 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
364 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
369 AddrDisc = DAG->getRegister(AArch64::NoRegister, MVT::i64);
372 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
383 // vector to all-one or all-zero.
390 if (Subtarget->hasLS64()) {
396 if (Subtarget->hasFPARMv8()) {
404 if (Subtarget->hasNEON()) {
427 if (Subtarget->isSVEorStreamingSVEAvailable()) {
452 if (Subtarget->useSVEForFixedLengthVectors()) {
463 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
473 computeRegisterProperties(Subtarget->getRegisterInfo());
562 // Lowering for many of the conversions is actually specified by the non-f128
588 if (Subtarget->hasFPARMv8()) {
594 if (Subtarget->hasFPARMv8()) {
612 // Variable-sized objects.
630 // AArch64 lacks both left-rotate and popcount instructions.
648 if (Subtarget->hasCSSC()) {
727 if (Subtarget->hasFullFP16()) {
810 // Round-to-integer need custom lowering for fp16, as Promote doesn't work
867 if (!Subtarget->hasFullFP16()) {
874 // AArch64 has implementations of a lot of rounding-like FP operations.
875 // clang-format off
890 if (Subtarget->hasFullFP16())
893 // clang-format on
900 if (Subtarget->hasFullFP16())
913 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
925 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
974 if (Subtarget->hasLSE128()) {
982 // 128-bit loads and stores can be done without expanding
986 // Aligned 128-bit loads and stores are single-copy atomic according to the
987 // v8.4a spec. LRCPC3 introduces 128-bit STILP/LDIAPP but still requires LSE2.
988 if (Subtarget->hasLSE2()) {
993 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
994 // custom lowering, as there are no un-paired non-temporal stores and
1005 // 256 bit non-temporal loads can be lowered to LDNP. This is done using
1006 // custom lowering, as there are no un-paired non-temporal loads legalization
1030 // Make floating-point constants legal for the large code model, so they don't
1032 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
1037 // AArch64 does not have floating-point extending loads, i1 sign-extending
1038 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
1057 if (Subtarget->hasFPARMv8()) {
1098 // Vector add and sub nodes may conceal a high-half opportunity.
1148 Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
1153 Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
1157 Subtarget->requiresStrictAlign() ? MaxStoresPerMemmoveOptSize : 16;
1161 Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
1176 if (!Subtarget->isTargetWindows())
1192 if (Subtarget->hasSME())
1195 if (Subtarget->isNeonAvailable()) {
1198 // clang-format off
1221 // clang-format on
1230 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1235 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1236 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1237 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1243 if (Subtarget->hasFullFP16()) {
1283 // Custom handling for some quad-vector types to detect MULL.
1313 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1374 if (Subtarget->hasFullFP16())
1383 if (Subtarget->hasFullFP16())
1421 Subtarget->isLittleEndian() ? Legal : Expand);
1438 if (Subtarget->hasSME()) {
1444 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1454 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1517 if (!Subtarget->isLittleEndian())
1520 if (Subtarget->hasSVE2() ||
1521 (Subtarget->hasSME() && Subtarget->isStreaming()))
1598 // SVE supports truncating stores of 64 and 128-bit vectors
1694 if (Subtarget->hasSVEB16B16()) {
1714 if (!Subtarget->hasSVEB16B16()) {
1733 // NEON doesn't support 64-bit vector integer muls, but SVE does.
1739 if (Subtarget->useSVEForFixedLengthVectors()) {
1742 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
1747 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
1812 // Handle operations that are only available in non-streaming SVE mode.
1813 if (Subtarget->isSVEAvailable()) {
1843 if (Subtarget->hasSVE2()) {
1852 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1859 if (Subtarget->hasSVE()) {
1866 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1871 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1873 if (Subtarget->isTargetWindows()) {
1885 if (Subtarget->isWindowsArm64EC()) {
1927 // But we do support custom-lowering for FCOPYSIGN.
1931 Subtarget->hasFullFP16()))
1980 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
2003 // * The lowering of the non-strict versions involves target-specific ISD
2009 if (Subtarget->isLittleEndian()) {
2017 if (Subtarget->hasD128()) {
2025 // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
2026 if (!Subtarget->hasSVE())
2030 // whilelo instruction for generating fixed-width predicates too.
2045 if (I->getIntrinsicID() != Intrinsic::experimental_vector_partial_reduce_add)
2048 EVT VT = EVT::getEVT(I->getType());
2049 auto Op1 = I->getOperand(1);
2050 EVT Op1VT = EVT::getEVT(Op1->getType());
2059 if (!Subtarget->isSVEorStreamingSVEAvailable())
2063 // also support fixed-width predicates.
2071 // MATCH is SVE2 and only available in non-streaming mode.
2072 if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
2074 // Furthermore, we can only use it for 8-bit or 16-bit elements.
2117 // Mark floating-point truncating stores/extending loads as having custom
2130 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2228 if (Subtarget->isNeonAvailable())
2234 if (Subtarget->isNeonAvailable())
2247 // isIntImmediate - This method tests to see if the node is a constant
2251 Imm = C->getZExtValue();
2257 // isOpcWithIntImmediate - This method tests to see if the node is a specific
2262 return N->getOpcode() == Opc &&
2263 isIntImmediate(N->getOperand(1).getNode(), Imm);
2271 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
2286 // The goal here is to set the non-demanded bits in a way that minimizes
2288 // we set the non-demanded bits to the value of the preceding demanded bits.
2290 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
2296 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2299 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2304 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
2309 // We cannot shrink the element size any further if it is 2-bits.
2344 // If the new constant immediate is all-zeros or all-ones, let the target
2401 uint64_t Imm = C->getZExtValue();
2405 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
2425 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2426 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2433 ~(Op->getConstantOperandAPInt(1) << Op->getConstantOperandAPInt(2))
2435 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2441 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2442 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2448 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2449 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2455 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2456 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2462 APInt(Known.getBitWidth(), Op->getConstantOperandVal(0)));
2467 if (!Subtarget->isTargetILP32())
2469 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
2474 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2480 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
2486 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
2488 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
2506 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - Bound);
2515 // bits larger than the element datatype. 32-bit or larget doesn't need
2521 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
2525 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
2560 // Compares return either 0 or all-ones
2582 // even with +strict-align. Predicated SVE loads/stores (e.g. ld1/st1), used
2583 // for stores that come from IR, only require element-size alignment (even if
2585 // have 16-byte alignment with +strict-align (and fail to lower as we don't
2593 if (Subtarget->requiresStrictAlign())
2597 // Some CPUs are fine with unaligned stores except for 128-bit ones.
2598 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
2608 // them regresses performance on micro-benchmarks and olden/bh.
2618 if (Subtarget->requiresStrictAlign())
2622 // Some CPUs are fine with unaligned stores except for 128-bit ones.
2623 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
2634 // them regresses performance on micro-benchmarks and olden/bh.
2989 // We materialise the F128CSEL pseudo-instruction as some control flow and a
3001 MachineFunction *MF = MBB->getParent();
3002 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3003 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3005 MachineFunction::iterator It = ++MBB->getIterator();
3013 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
3014 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
3015 MF->insert(It, TrueBB);
3016 MF->insert(It, EndBB);
3018 // Transfer rest of current basic-block to EndBB
3019 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
3020 MBB->end());
3021 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
3023 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
3024 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
3025 MBB->addSuccessor(TrueBB);
3026 MBB->addSuccessor(EndBB);
3029 TrueBB->addSuccessor(EndBB);
3032 TrueBB->addLiveIn(AArch64::NZCV);
3033 EndBB->addLiveIn(AArch64::NZCV);
3036 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
3049 BB->getParent()->getFunction().getPersonalityFn())) &&
3057 MachineFunction &MF = *MBB->getParent();
3059 DebugLoc DL = MBB->findDebugLoc(MBBI);
3067 return NextInst->getParent();
3074 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3075 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
3090 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3092 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));
3108 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3111 MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode))
3124 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3125 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
3156 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3158 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
3174 MachineFunction *MF = BB->getParent();
3175 MachineFrameInfo &MFI = MF->getFrameInfo();
3176 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3177 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3179 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3181 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui))
3185 // Set the reserved bytes (10-15) to zero
3186 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui))
3190 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui))
3197 BB->remove_instr(&MI);
3204 MachineFunction *MF = BB->getParent();
3205 MachineFrameInfo &MFI = MF->getFrameInfo();
3206 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3211 assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() &&
3214 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3217 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3218 MachineRegisterInfo &MRI = MF->getRegInfo();
3223 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), SP)
3226 // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
3229 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MSUBXrrr), Dest)
3233 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
3241 BB->remove_instr(&MI);
3249 MachineFunction *MF = BB->getParent();
3250 MachineFrameInfo &MFI = MF->getFrameInfo();
3251 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3252 assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() &&
3255 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3256 if (FuncInfo->isSMESaveBufferUsed()) {
3260 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::SUBXrx64), AArch64::SP)
3264 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), Dest)
3270 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
3273 BB->remove_instr(&MI);
3281 MachineFunction *MF = BB->getParent();
3282 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3283 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3284 if (FuncInfo->isSMESaveBufferUsed()) {
3285 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3286 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
3289 .addRegMask(TRI->getCallPreservedMask(
3292 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
3296 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
3299 BB->remove_instr(&MI);
3307 if (SMEOrigInstr != -1) {
3308 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3310 TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
3346 // has implicit def. This def is early-clobber as it will be set at
3404 //===----------------------------------------------------------------------===//
3406 //===----------------------------------------------------------------------===//
3408 //===----------------------------------------------------------------------===//
3410 //===----------------------------------------------------------------------===//
3422 /// isZerosVector - Check whether SDNode N is a zero-filled vector.
3425 while (N->getOpcode() == ISD::BITCAST)
3426 N = N->getOperand(0).getNode();
3431 if (N->getOpcode() != AArch64ISD::DUP)
3434 auto Opnd0 = N->getOperand(0);
3438 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
3467 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
3557 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
3583 // All of the compare-mask comparisons are ordered, but we can switch
3606 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
3612 // So, finally, the only LLVM-native comparisons that don't mention C or V
3709 /// - We can implement (NEG SETCC) i.e. negating a single comparison by
3711 /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
3714 /// - Note that we can only ever negate all previously processed results.
3716 /// of two sub-trees (because the negation affects all sub-trees emitted so
3717 /// far, so the 2nd sub-tree we emit would also affect the first).
3719 /// - (OR (SETCC A) (SETCC B)) can be implemented via:
3721 /// - After transforming OR to NEG/AND combinations we may be able to use NEG
3764 APInt Imm = Const->getAPIntValue();
3765 if (Imm.isNegative() && Imm.sgt(-32)) {
3767 RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
3791 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
3794 /// Negate==true on this sub-tree)
3808 unsigned Opcode = Val->getOpcode();
3810 if (Val->getOperand(0).getValueType() == MVT::f128)
3821 SDValue O0 = Val->getOperand(0);
3822 SDValue O1 = Val->getOperand(1);
3841 // the leafs, then this sub-tree as a whole negates naturally.
3843 // If we cannot naturally negate the whole sub-tree, then this must be
3863 /// \p Negate is true if we want this sub-tree being negated just by changing
3869 unsigned Opcode = Val->getOpcode();
3871 SDValue LHS = Val->getOperand(0);
3872 SDValue RHS = Val->getOperand(1);
3873 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
3906 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
3910 SDValue LHS = Val->getOperand(0);
3917 SDValue RHS = Val->getOperand(1);
3924 // Swap sub-tree that must come first to the right side.
3937 // Swap the sub-tree that we can negate naturally to the left.
3946 // Negate the left sub-tree if possible, otherwise negate the result.
3962 // Emit sub-trees.
3998 uint64_t Mask = MaskCst->getZExtValue();
4014 uint64_t Shift = ShiftCst->getZExtValue();
4030 uint64_t C = RHSC->getZExtValue();
4039 isLegalArithImmed((uint32_t)(C - 1))) ||
4041 isLegalArithImmed(C - 1ULL))) {
4043 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
4050 isLegalArithImmed((uint32_t)(C - 1))) ||
4051 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
4053 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
4094 !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
4117 // -1 constant. For example,
4128 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
4129 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
4130 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
4131 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4132 int16_t ValueofRHS = RHS->getAsZExtVal();
4133 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
4144 if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
4146 if ((CC == ISD::SETNE) ^ RHSC->isZero())
4194 // Extend to 64-bits, then perform a 64-bit multiply.
4201 // Check that the result fits into a 32-bit integer.
4241 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
4252 !Subtarget->isNeonAvailable()))
4262 // -->
4268 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
4287 // (xor x, (select_cc a, b, cc, 0, -1) )
4288 // -->
4289 // (csel x, (xor x, -1), cc ...)
4293 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
4299 // FIXME: This could be generalized to non-integer comparisons.
4312 if (CTVal->isAllOnes() && CFVal->isZero()) {
4319 if (CTVal->isZero() && CFVal->isAllOnes()) {
4438 // The front-end should have filtered out the out-of-range values
4439 assert(Locality <= 3 && "Prefetch locality out-of-range");
4443 Locality = 3 - Locality;
4456 // Converts SETCC (AND X Y) Z ULT -> SETCC (AND X (Y & ~(Z - 1)) 0 EQ when Y is
4457 // a power of 2. This is then lowered to ANDS X (Y & ~(Z - 1)) instead of SUBS
4461 if (CC == ISD::SETULT && LHS.getOpcode() == ISD::AND && LHS->hasOneUse()) {
4465 uint64_t LHSConstValue = LHSConstOp->getZExtValue();
4466 uint64_t RHSConstant = RHSConst->getZExtValue();
4468 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4496 // and the second using native f32->VT instructions.
4505 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
4508 bool IsStrict = Op->isStrictFPOpcode();
4512 // FP16->FP32 extends are legal for v32 and v4f32.
4515 // Split bf16->f64 extends into two fpextends.
4530 // FP16->FP32 extends are legal for v32 and v4f32.
4563 bool IsStrict = Op->isStrictFPOpcode();
4574 auto ImmV = [&](int I) -> SDValue { return DAG.getConstant(I, DL, I32); };
4580 if (Subtarget->hasBF16())
4590 (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
4601 return DAG.getNode(Op.getOpcode(), DL, VT, NewOps, Op->getFlags());
4627 if (useSVEForFixedLengthVectorVT(SrcVT, !Subtarget->isNeonAvailable()))
4633 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4634 Subtarget->hasBF16())) {
4704 bool IsStrict = Op->isStrictFPOpcode();
4715 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
4716 useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
4722 if ((InVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4769 // Use a scalar operation for conversions between single-element vectors of
4789 bool IsStrict = Op->isStrictFPOpcode();
4796 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4822 // AArch64 FP-to-int conversions saturate to the destination element size, so
4827 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4847 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
4926 // AArch64 FP-to-int conversions saturate to the destination register size, so
4935 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4941 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
4951 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
4992 // Round the floating-point value into a floating-point register with the
5006 bool IsStrict = Op->isStrictFPOpcode();
5028 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
5029 useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
5075 // Use a scalar operation for conversions between single-element vectors of
5096 bool IsStrict = Op->isStrictFPOpcode();
5099 bool IsSigned = Op->getOpcode() == ISD::STRICT_SINT_TO_FP ||
5100 Op->getOpcode() == ISD::SINT_TO_FP;
5131 // We need to be careful about i64 -> bf16.
5154 // double-precision value or it is too big. If it is sufficiently small,
5155 // we should just go u64 -> double -> bf16 in a naive way. Otherwise, we
5156 // ensure that u64 -> double has no rounding error by only using the 52
5217 if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5225 // Other conversions are legal, unless it's to the completely software-based
5282 "Expected int->fp bitcast!");
5324 // Returns lane if Op extracts from a two-element vector and lane is constant
5329 if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
5332 EVT VT = OpNode->getOperand(0).getValueType();
5333 ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));
5337 return C->getZExtValue();
5347 for (const SDValue &Elt : N->op_values()) {
5352 if (!isIntN(HalfSize, C->getSExtValue()))
5355 if (!isUIntN(HalfSize, C->getZExtValue()))
5393 return N0->hasOneUse() && N1->hasOneUse() &&
5404 return N0->hasOneUse() && N1->hasOneUse() &&
5413 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
5436 SDValue Chain = Op->getOperand(0);
5437 SDValue RMValue = Op->getOperand(1);
5441 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
5442 // ((arg - 1) & 3) << 22).
5480 SDValue Chain = Op->getOperand(0);
5499 SDValue Chain = Op->getOperand(0);
5500 SDValue Mode = Op->getOperand(1);
5514 SDValue Chain = Op->getOperand(0);
5595 bool OverrideNEON = !Subtarget->isNeonAvailable();
5599 // Multiplications are only custom-lowered for 128-bit and 64-bit vectors so
5602 "unexpected type for custom-lowering ISD::MUL");
5617 if (Subtarget->hasSVE())
5634 if (Subtarget->hasSVE())
5656 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
5657 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
5730 "Expected a predicate-to-predicate bitcast");
5752 // case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
5789 // ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11]
5793 // ->
5805 // ->
5811 // Case 4: If the vecnum is an add of an immediate, then the non-immediate
5815 // ->
5826 SDValue TileSlice = N->getOperand(2);
5827 SDValue Base = N->getOperand(3);
5828 SDValue VecNum = N->getOperand(4);
5835 ConstAddend = cast<ConstantSDNode>(VecNum.getOperand(1))->getSExtValue();
5838 ConstAddend = ImmNode->getSExtValue();
5843 if (int32_t C = (ConstAddend - ImmAddend)) {
5885 "Expected 8-bit or 16-bit characters.");
5888 // A single container is enough for both operands because ultimately the
5895 // If Op2 is a full 128-bit vector, wrap it trivially in a scalable vector.
5902 // If Op2 is not a full 128-bit vector, we always need to broadcast it.
5966 Op->getOperand(0), // Chain
5972 Op->getOperand(0), // Chain
5987 SDValue Chain = Node->getChain();
5992 auto Alignment = Node->getMemOperand()->getAlign();
5993 bool IsVol = Node->isVolatile();
5994 auto DstPtrInfo = Node->getPointerInfo();
6053 SelectionDAG &DAG) -> SDValue {
6056 // re-use the dag-combiner function with aarch64_neon_{pmull,smull,umull}.
6081 // the non-high version of PMULL instruction. Use v1i64 to represent i64.
6405 const auto *RegInfo = Subtarget->getRegisterInfo();
6406 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
6418 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
6491 // then extracting a fixed-width subvector from the scalable vector.
6529 // an SVE predicate register mask from the fixed-width vector.
6557 // SVE only supports implicit extension of 32-bit indices.
6558 if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
6565 // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit
6566 // element container type, which would violate the previous clause.
6572 if (!ExtVT.isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
6580 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal->getOperand(0))) {
6581 if (!isLoadExtLegalOrCustom(ISD::ZEXTLOAD, ExtVT, Ld->getValueType(0))) {
6582 // Disable extending masked loads for fixed-width for now, since the code
6588 for (auto *U : Ld->getMask()->users())
6620 return AddrModes.find(Key)->second;
6650 SDValue Chain = MGT->getChain();
6651 SDValue PassThru = MGT->getPassThru();
6652 SDValue Mask = MGT->getMask();
6653 SDValue BasePtr = MGT->getBasePtr();
6654 SDValue Index = MGT->getIndex();
6655 SDValue Scale = MGT->getScale();
6657 EVT MemVT = MGT->getMemoryVT();
6658 ISD::LoadExtType ExtType = MGT->getExtensionType();
6659 ISD::MemIndexType IndexType = MGT->getIndexType();
6663 if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) {
6666 DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
6667 MGT->getMemOperand(), IndexType, ExtType);
6672 bool IsScaled = MGT->isIndexScaled();
6673 bool IsSigned = MGT->isIndexSigned();
6677 uint64_t ScaleVal = Scale->getAsZExtVal();
6679 assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
6686 return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
6687 MGT->getMemOperand(), IndexType, ExtType);
6692 assert(Subtarget->useSVEForFixedLengthVectors() &&
6695 // NOTE: Handle floating-point as if integer then bitcast the result.
6722 PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT)
6729 Ops, MGT->getMemOperand(), IndexType, ExtType);
6749 SDValue Chain = MSC->getChain();
6750 SDValue StoreVal = MSC->getValue();
6751 SDValue Mask = MSC->getMask();
6752 SDValue BasePtr = MSC->getBasePtr();
6753 SDValue Index = MSC->getIndex();
6754 SDValue Scale = MSC->getScale();
6756 EVT MemVT = MSC->getMemoryVT();
6757 ISD::MemIndexType IndexType = MSC->getIndexType();
6758 bool Truncating = MSC->isTruncatingStore();
6760 bool IsScaled = MSC->isIndexScaled();
6761 bool IsSigned = MSC->isIndexSigned();
6765 uint64_t ScaleVal = Scale->getAsZExtVal();
6767 assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
6774 return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
6775 MSC->getMemOperand(), IndexType, Truncating);
6780 assert(Subtarget->useSVEForFixedLengthVectors() &&
6783 // Once bitcast we treat floating-point scatters as if integer.
6817 return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
6818 MSC->getMemOperand(), IndexType, Truncating);
6829 EVT VT = Op->getValueType(0);
6834 SDValue PassThru = LoadNode->getPassThru();
6835 SDValue Mask = LoadNode->getMask();
6837 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
6841 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
6842 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
6843 LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
6844 LoadNode->getExtensionType());
6858 SDValue Value = ST->getValue();
6879 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
6880 ST->getBasePtr(), ST->getMemOperand());
6892 SDValue Value = StoreNode->getValue();
6895 EVT MemVT = StoreNode->getMemoryVT();
6900 /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
6903 unsigned AS = StoreNode->getAddressSpace();
6904 Align Alignment = StoreNode->getAlign();
6907 StoreNode->getMemOperand()->getFlags(),
6912 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
6916 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
6917 // the custom lowering, as there are no un-paired non-temporal stores and
6920 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
6929 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
6933 StoreNode->getValue(),
6937 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
6938 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
6941 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
6944 SDValue Value = StoreNode->getValue();
6945 assert(Value->getValueType(0) == MVT::i64x8);
6946 SDValue Chain = StoreNode->getChain();
6947 SDValue Base = StoreNode->getBasePtr();
6954 Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
6955 StoreNode->getOriginalAlign());
6963 /// Lower atomic or volatile 128-bit stores to a single STP instruction.
6967 assert(StoreNode->getMemoryVT() == MVT::i128);
6968 assert(StoreNode->isVolatile() || StoreNode->isAtomic());
6971 StoreNode->getMergedOrdering() == AtomicOrdering::Release;
6972 if (StoreNode->isAtomic())
6973 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
6974 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
6975 StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
6976 StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
6978 SDValue Value = (StoreNode->getOpcode() == ISD::STORE ||
6979 StoreNode->getOpcode() == ISD::ATOMIC_STORE)
6980 ? StoreNode->getOperand(1)
6981 : StoreNode->getOperand(2);
6989 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
6990 StoreNode->getBasePtr()},
6991 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
7001 if (LoadNode->getMemoryVT() == MVT::i64x8) {
7003 SDValue Base = LoadNode->getBasePtr();
7004 SDValue Chain = LoadNode->getChain();
7010 LoadNode->getPointerInfo(),
7011 LoadNode->getOriginalAlign());
7020 EVT VT = Op->getValueType(0);
7023 if (LoadNode->getMemoryVT() != MVT::v4i8)
7027 if (Subtarget->requiresStrictAlign() && LoadNode->getAlign() < Align(4))
7031 if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
7033 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
7034 LoadNode->getExtensionType() == ISD::EXTLOAD)
7039 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
7040 LoadNode->getBasePtr(), MachinePointerInfo());
7068 if (!Subtarget->isSVEAvailable())
7197 VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
7256 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
7267 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
7290 DAG.getConstant(MFI.getObjectSize(FI->getIndex()), dl, MVT::i64);
7503 if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
7504 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
7546 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
7561 !Subtarget->isNeonAvailable()))
7635 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
7660 return !Subtarget->useSVEForFixedLengthVectors();
7685 // NEON-sized vectors can be emulated using SVE instructions.
7687 return Subtarget->isSVEorStreamingSVEAvailable();
7694 if (!Subtarget->useSVEForFixedLengthVectors())
7698 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
7709 //===----------------------------------------------------------------------===//
7711 //===----------------------------------------------------------------------===//
7714 unsigned Opcode = N->getOpcode();
7719 unsigned IID = N->getConstantOperandVal(0);
7755 // The non-vararg case is handled in the CC function itself.
7768 if (Subtarget->isTargetWindows()) {
7770 if (Subtarget->isWindowsArm64EC())
7776 if (!Subtarget->isTargetDarwin())
7780 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
7784 if (Subtarget->isWindowsArm64EC())
7790 if (Subtarget->isWindowsArm64EC())
7814 if (Subtarget->isWindowsArm64EC())
7833 Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
7835 (isVarArg && Subtarget->isWindowsArm64EC());
7842 FuncInfo->setIsSVECC(true);
7861 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
7865 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
7893 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
7897 // non-compliant manner for larger structs.
7902 // FIXME: This works on big-endian for composite byvals, which are the common
7913 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
7935 FuncInfo->setIsSVECC(true);
7938 FuncInfo->setIsSVECC(true);
7941 FuncInfo->setIsSVECC(true);
7956 // tn: res,ch,glue = CopyFromReg t(n-1), ..
7972 // If this is an 8, 16 or 32-bit value, it is really passed promoted
7982 (VA.getValVT().isScalableVT() || Subtarget->isWindowsArm64EC()) &&
8006 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
8008 BEAlign = 8 - ArgSize;
8046 Subtarget->isWindowsArm64EC()) &&
8067 Subtarget->isWindowsArm64EC()) &&
8073 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
8085 NumParts--;
8104 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
8108 // i1 arguments are zero-extended to i8 by the caller. Emit a
8112 if (OrigArg->getType()->isIntegerTy(1)) {
8133 FuncInfo->setPStateSMReg(Reg);
8154 if (!Subtarget->isTargetDarwin() || IsWin64) {
8155 // The AAPCS variadic function ABI is identical to the non-variadic
8165 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
8166 VarArgsOffset = alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
8167 FuncInfo->setVarArgsStackOffset(VarArgsOffset);
8168 FuncInfo->setVarArgsStackIndex(
8177 FuncInfo->getForwardedMustTailRegParms();
8197 assert(!FuncInfo->getSRetReturnReg());
8202 FuncInfo->setSRetReturnReg(Reg);
8214 // This is a non-standard ABI so by fiat I say we're allowed to make full
8221 FuncInfo->setArgumentStackToRestore(StackArgSize);
8229 FuncInfo->setBytesInStackArgArea(StackArgSize);
8231 if (Subtarget->hasCustomCallingConv())
8232 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
8237 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
8243 if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) {
8264 if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) {
8279 FuncInfo->setSMESaveBufferAddr(BufferPtr);
8288 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
8310 Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
8316 if (Subtarget->isWindowsArm64EC()) {
8317 // In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs
8323 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
8327 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
8330 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
8335 if (Subtarget->isWindowsArm64EC()) {
8337 // compute its address relative to x4. For a normal AArch64->AArch64
8354 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
8361 FuncInfo->setVarArgsGPRIndex(GPRIdx);
8362 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
8364 if (Subtarget->hasFPARMv8() && !IsWin64) {
8369 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
8387 FuncInfo->setVarArgsFPRIndex(FPRIdx);
8388 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
8396 /// LowerCallResult - Lower the result values of a call into the
8504 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC, IsVarArg);
8575 // The check for matching callee-saved regs will determine whether it is
8578 MF.getInfo<AArch64FunctionInfo>()->isSVECC())
8583 // When using the Windows calling convention on a non-windows OS, we want
8586 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
8596 if (i->hasByValAttr())
8599 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
8605 if (i->hasInRegAttr())
8612 // Externally-defined functions with weak linkage should not be
8613 // tail-called on AArch64 when the OS does not support dynamic
8614 // pre-emption of symbols, as the AAELF spec requires normal calls
8617 // situation (as used for tail calls) is implementation-defined, so we
8620 const GlobalValue *GV = G->getGlobal();
8622 if (GV->hasExternalWeakLinkage() &&
8643 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8644 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8646 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8647 if (Subtarget->hasCustomCallingConv()) {
8648 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
8649 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
8651 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8664 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
8686 Subtarget->isWindowsArm64EC()) &&
8694 if (CCInfo.getStackSize() > FuncInfo->getBytesInStackArgArea())
8710 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
8718 for (SDNode *U : DAG.getEntryNode().getNode()->users())
8720 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
8721 if (FI->getIndex() < 0) {
8722 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
8724 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
8741 // Check if the value is zero-extended from i1 to i8
8766 MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
8786 if (!Def || !Def->getParent()->isCopy())
8789 const MachineOperand &CopySrc = Def->getParent()->getOperand(1);
8795 if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg ||
8796 MRI.getRegClass(CopySrcOp->getReg()) != RegClass)
8805 // Live-in physreg copies that are glued to SMSTART are applied as
8806 // implicit-def's in the InstrEmitter. Here we remove them, allowing the
8808 // copies to avoid these fake clobbers of actually-preserved GPRs.
8811 for (unsigned I = MI.getNumOperands() - 1; I > 0; --I)
8835 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8837 TII->get(TargetOpcode::REG_SEQUENCE),
8842 MIB.addImm(AArch64::zsub0 + (I - 1));
8851 // frame-address. If they contain a frame-index to a scalable vector, this
8855 if (MF.getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
8873 FuncInfo->setHasStreamingModeChanges(true);
8875 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8876 SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask());
8901 FuncInfo->setSMESaveBufferUsed();
8907 DAG.getCopyFromReg(Chain, DL, Info->getSMESaveBufferAddr(), MVT::i64);
8934 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
8956 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
8960 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
8961 !Subtarget->noBTIAtReturnTwice()) {
8962 GuardWithBTI = FuncInfo->branchTargetEnforcement();
9015 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9032 // arguments to begin at SP+0. Completely unused for non-tail calls.
9036 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
9039 // popped size 16-byte aligned.
9045 FPDiff = NumReusableBytes - NumBytes;
9049 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
9050 FuncInfo->setTailCallReservedStack(-FPDiff);
9052 // The stack pointer must be 16-byte aligned at all times it's used for a
9055 // a 16-byte aligned SP and the delta applied for the tail call should
9065 CalleeAttrs = SMEAttrs(ES->getSymbol());
9068 [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
9071 R << ore::NV("Callee", ES->getSymbol());
9072 else if (CLI.CB && CLI.CB->getCalledFunction())
9073 R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
9084 const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
9153 // PSTATE.ZA before the call if there is no lazy-save active.
9156 "Lazy-save should have PSTATE.SM=1 on entry to the function");
9177 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
9178 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
9188 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
9206 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
9209 // already be zero-extended.
9211 // We cannot just emit a (zext i8 (trunc (assert-zext i8)))
9215 // (ext (zext x)) -> (zext x)
9227 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
9243 assert((isScalable || Subtarget->isWindowsArm64EC()) &&
9250 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
9273 NumParts--;
9308 // parts of an [N x i32] into an X-register. The extension type will
9316 ->second;
9344 // FIXME: This works on big-endian for composite byvals, which are the
9355 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
9358 BEAlign = 8 - OpSize;
9390 /*CI=*/nullptr, std::nullopt, DstInfo, MachinePointerInfo());
9407 if (IsVarArg && Subtarget->isWindowsArm64EC()) {
9430 if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
9443 // Build a sequence of copy-to-reg nodes chained together with token chain
9457 CalledGlobal = G->getGlobal();
9458 OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
9464 const GlobalValue *GV = G->getGlobal();
9469 Subtarget->isTargetMachO()) ||
9470 MF.getFunction().getParent()->getRtLibUseGOT();
9471 const char *Sym = S->getSymbol();
9480 // We don't usually want to end the call-sequence here because we would tidy
9481 // the frame up *after* the call, however in the ABI-changing tail-call case
9522 const uint64_t Key = CLI.PAI->Key;
9529 extractPtrauthBlendDiscriminators(CLI.PAI->Discriminator, &DAG);
9546 // Add a register mask operand representing the call-preserved registers.
9548 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
9550 // For 'this' returns, use the X0-preserving mask if applicable
9551 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
9554 Mask = TRI->getCallPreservedMask(MF, CallConv);
9557 Mask = TRI->getCallPreservedMask(MF, CallConv);
9559 if (Subtarget->hasCustomCallingConv())
9560 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
9562 if (TRI->isAnyArgRegReserved(MF))
9563 TRI->emitReservedArgRegCallError(MF);
9577 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
9589 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
9610 InGlue = Result.getValue(Result->getNumValues() - 1);
9618 if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
9640 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
9642 TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
9679 // a vreg -> vreg copy.
9693 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
9744 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
9759 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
9770 })->second;
9778 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
9784 Register Reg = FuncInfo->getPStateSMReg();
9812 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
9826 const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
9862 //===----------------------------------------------------------------------===//
9864 //===----------------------------------------------------------------------===//
9869 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
9870 N->getOffset(), Flag);
9876 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
9882 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
9883 N->getOffset(), Flag);
9889 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
9895 return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
9910 ->hasELFSignedGOT())
9960 const GlobalValue *GV = GN->getGlobal();
9961 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
9964 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
10021 assert(Subtarget->isTargetDarwin() &&
10027 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
10052 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
10053 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
10054 if (Subtarget->hasCustomCallingConv())
10055 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
10067 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
10068 if (DAG.getMachineFunction().getFunction().hasFnAttribute("ptrauth-calls")) {
10082 /// Convert a thread-local variable reference into a sequence of instructions to
10173 /// When accessing thread-local variables under either the general-dynamic or
10174 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
10175 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
10188 /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
10200 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()
10212 assert(Subtarget->isTargetELF() && "This function expects an ELF target");
10218 TLSModel::Model Model = MFI->hasELFSignedGOT()
10220 : getTargetMachine().getTLSModel(GA->getGlobal());
10241 const GlobalValue *GV = GA->getGlobal();
10251 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
10257 MFI->incNumLocalDynamicTLSAccesses();
10266 // thread-local area.
10270 // in its thread-storage area.
10301 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
10341 const GlobalValue *GV = GA->getGlobal();
10363 if (Subtarget->isTargetDarwin())
10365 if (Subtarget->isTargetELF())
10367 if (Subtarget->isTargetWindows())
10373 //===----------------------------------------------------------------------===//
10377 // - MOVaddrPAC: similar to MOVaddr, with added PAC.
10381 // - LOADgotPAC: similar to LOADgot, with added PAC.
10384 // section is assumed to be read-only (for example, via relro mechanism). See
10387 // - LOADauthptrstatic: similar to LOADgot, but use a
10395 // provide integrity guarantees on the to-be-signed intermediate values.
10398 // with often similarly-signed pointers, making it a good harvesting target.
10406 assert(TGN->getGlobal()->hasExternalWeakLinkage());
10412 if (TGN->getOffset() != 0)
10414 "unsupported non-zero offset in weak ptrauth global reference");
10417 report_fatal_error("unsupported weak addr-div ptrauth global");
10439 // Blend only works if the integer discriminator is 16-bit wide.
10444 // Choosing between 3 lowering alternatives is target-specific.
10445 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
10454 const GlobalValue *PtrGV = PtrN->getGlobal();
10458 Subtarget->ClassifyGlobalReference(PtrGV, getTargetMachine());
10461 "unsupported non-GOT op flags on ptrauth global reference");
10464 PtrOffsetC += PtrN->getOffset();
10467 assert(PtrN->getTargetFlags() == 0 &&
10476 // No GOT load needed -> MOVaddrPAC
10478 assert(!PtrGV->hasExternalWeakLinkage() && "extern_weak should use GOT");
10485 // GOT load -> LOADgotPAC
10487 if (!PtrGV->hasExternalWeakLinkage())
10493 // extern_weak ref -> LOADauthptrstatic
10505 cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
10510 Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
10512 return {Val, Val.getValueSizeInBits() - 1};
10517 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
10549 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
10572 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
10576 // out of bounds, a late MI-layer pass rewrites branches.
10592 // out of bounds, a late MI-layer pass rewrites branches.
10615 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
10654 if (!Subtarget->isNeonAvailable() &&
10655 !Subtarget->useSVEForFixedLengthVectors())
10674 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
10693 auto SetVecVal = [&](int Idx = -1) {
10725 // 64-bit elements. Instead, materialize all bits set and then negate that.
10753 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
10756 if (!Subtarget->isNeonAvailable())
10772 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
10773 // CNT V0.8B, V0.8B // 8xbyte pop-counts
10774 // ADDV B0, V0.8B // sum 8xbyte pop-counts
10811 if (Subtarget->hasDotProd() && VT.getScalarSizeInBits() != 16 &&
10848 VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()));
10881 VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
10908 VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
10959 // Skip the one-use zext
10960 if (N->getOpcode() == ISD::ZERO_EXTEND && N->hasOneUse())
10961 N = N->getOperand(0);
10964 if (N->getOpcode() == ISD::XOR) {
10965 WorkList.push_back(std::make_pair(N->getOperand(0), N->getOperand(1)));
10970 // All the non-leaf nodes must be OR.
10971 if (N->getOpcode() != ISD::OR || !N->hasOneUse())
10974 if (isOrXorChain(N->getOperand(0), Num, WorkList) &&
10975 isOrXorChain(N->getOperand(1), Num, WorkList))
10982 SDValue LHS = N->getOperand(0);
10983 SDValue RHS = N->getOperand(1);
10985 EVT VT = N->getValueType(0);
10989 if (N->getOpcode() != ISD::SETCC)
10992 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10997 LHS->getOpcode() == ISD::OR && LHS->hasOneUse() &&
11021 bool IsStrict = Op->isStrictFPOpcode();
11029 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
11127 ISD::CondCode Cond = cast<CondCodeSDNode>(Op.getOperand(3))->get();
11155 if ((LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
11169 // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
11170 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
11172 if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
11173 CTVal->isOne() && CFVal->isAllOnes() &&
11178 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
11183 // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
11184 // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
11187 RHSC && RHSC->isZero() && CFVal && CFVal->isZero() &&
11192 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
11204 if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
11208 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
11229 const int64_t TrueVal = CTVal->getSExtValue();
11230 const int64_t FalseVal = CFVal->getSExtValue();
11239 TrueVal == -FalseVal) {
11242 // If our operands are only 32-bit wide, make sure we use 32-bit
11246 // 64-bit arithmetic).
11247 const uint32_t TrueVal32 = CTVal->getZExtValue();
11248 const uint32_t FalseVal32 = CFVal->getZExtValue();
11258 // 64-bit check whether we can use CSINC.
11291 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
11292 !RHSVal->isZero() && !RHSVal->isAllOnes()) {
11300 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
11302 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
11334 if (RHSVal && RHSVal->isZero()) {
11339 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
11342 CFVal && CFVal->isZero() &&
11374 // -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
11384 // Create a predicate where all but the last -IdxVal elements are false.
11404 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
11415 SDValue CCVal = Op->getOperand(0);
11416 SDValue TVal = Op->getOperand(1);
11417 SDValue FVal = Op->getOperand(2);
11435 if (useSVEForFixedLengthVectorVT(Ty, !Subtarget->isNeonAvailable())) {
11450 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
11468 CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
11477 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
11486 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
11501 !Subtarget->isTargetMachO())
11515 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
11518 AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
11520 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
11523 "aarch64-jump-table-hardening")) {
11525 if (Subtarget->isTargetMachO()) {
11527 report_fatal_error("Unsupported code-model for hardened jump-table");
11530 assert(Subtarget->isTargetELF() &&
11533 report_fatal_error("Unsupported code-model for hardened jump-table");
11556 // Skip over the jump-table BRINDs, where the destination is JumpTableDest32.
11557 if (Dest->isMachineOpcode() &&
11558 Dest->getMachineOpcode() == AArch64::JumpTableDest32)
11563 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.getFunction());
11584 if (Subtarget->isTargetMachO()) {
11598 const BlockAddress *BA = BAN->getBlockAddress();
11601 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
11602 *BA->getFunction())) {
11606 SDValue TargetBA = DAG.getTargetBlockAddress(BA, BAN->getValueType(0));
11621 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) {
11636 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
11639 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11651 if (Subtarget->isWindowsArm64EC()) {
11653 // relative to x4. For a normal AArch64->AArch64 call, x4 == sp on entry,
11658 if (FuncInfo->getVarArgsGPRSize() > 0)
11659 StackOffset = -(uint64_t)FuncInfo->getVarArgsGPRSize();
11661 StackOffset = FuncInfo->getVarArgsStackOffset();
11665 FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
11666 ? FuncInfo->getVarArgsGPRIndex()
11667 : FuncInfo->getVarArgsStackIndex(),
11670 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11681 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
11688 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11693 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
11700 int GPRSize = FuncInfo->getVarArgsGPRSize();
11707 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
11719 int FPRSize = FuncInfo->getVarArgsFPRSize();
11725 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
11740 DAG.getStore(Chain, DL, DAG.getSignedConstant(-GPRSize, DL, MVT::i32),
11748 DAG.getStore(Chain, DL, DAG.getSignedConstant(-FPRSize, DL, MVT::i32),
11759 if (Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()))
11761 else if (Subtarget->isTargetDarwin())
11772 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
11774 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
11776 : Subtarget->isTargetILP32() ? 20 : 32;
11777 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
11778 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
11782 Align(PtrSize), false, false, /*CI=*/nullptr,
11788 assert(Subtarget->isTargetDarwin() &&
11791 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11797 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
11811 DAG.getConstant(Align->value() - 1, DL, PtrVT));
11813 DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
11867 while (Depth--)
11871 if (Subtarget->isTargetILP32())
11897 const AArch64RegisterInfo *MRI = Subtarget->getRegisterInfo();
11898 unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
11899 if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
11900 !MRI->isReservedReg(MF, Reg))
11941 // live-in.
11946 // The XPACLRI instruction assembles to a hint-space instruction before
11947 // Armv8.3-A therefore this instruction can be safely used for any pre
11948 // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
11951 if (Subtarget->hasPAuth()) {
11962 /// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
11981 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
11982 // 16-bit case when target has full fp16 support.
11991 IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
11993 IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
11996 (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
12014 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
12023 //===----------------------------------------------------------------------===//
12025 //===----------------------------------------------------------------------===//
12031 if ((ST->hasNEON() &&
12035 (ST->hasSVE() &&
12040 // the initial estimate is 2^-8. Thus the number of extra steps to refine
12047 : Log2_64_Ceil(DesiredBits) - Log2_64_Ceil(AccurateBits);
12078 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
12086 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
12087 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
12088 for (int i = ExtraSteps; i > 0; --i) {
12115 // Newton reciprocal iteration: E * (2 - X * E)
12116 // AArch64 reciprocal iteration instruction: (2 - M * N)
12117 for (int i = ExtraSteps; i > 0; --i) {
12130 //===----------------------------------------------------------------------===//
12132 //===----------------------------------------------------------------------===//
12138 // r - A general register
12139 // w - An FP/SIMD register of some size in the range v0-v31
12140 // x - An FP/SIMD register of some size in the range v0-v15
12141 // I - Constant that can be used with an ADD instruction
12142 // J - Constant that can be used with a SUB instruction
12143 // K - Constant that can be used with a 32-bit logical instruction
12144 // L - Constant that can be used with a 64-bit logical instruction
12145 // M - Constant that can be used as a 32-bit MOV immediate
12146 // N - Constant that can be used as a 64-bit MOV immediate
12147 // Q - A memory reference with base register and no offset
12148 // S - A symbolic address
12149 // Y - Floating point constant zero
12150 // Z - Integer constant zero
12152 // Note that general register operands will be output using their 64-bit x
12154 // is prefixed by the %w modifier. Floating-point and SIMD register operands
12165 if (!Subtarget->hasFPARMv8())
12188 // not what we want. The code here pre-empts this by matching the register
12196 Constraint = Constraint.substr(2, Constraint.size() - 3);
12267 // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands
12332 /// getConstraintType - Given a constraint letter, return the type of
12382 Type *type = CallOperandVal->getType();
12391 if (type->isFloatingPointTy() || type->isVectorTy())
12414 if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
12420 if (!Subtarget->hasFPARMv8())
12441 // only take 128-bit registers so just use that regclass.
12443 if (!Subtarget->hasFPARMv8())
12451 if (!Subtarget->hasFPARMv8())
12489 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
12491 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
12493 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
12494 // By default we'll emit v0-v31 for this unless there's a modifier where
12507 if (Res.second && !Subtarget->hasFPARMv8() &&
12518 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
12524 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
12571 uint64_t CVal = C->getZExtValue();
12577 // instruction [or vice versa], in other words -1 to -4095 with optional
12584 uint64_t NVal = -C->getSExtValue();
12586 CVal = C->getSExtValue();
12594 // distinguish between bit patterns that are valid 32-bit or 64-bit
12608 // also match 32 or 64-bit immediates that can be loaded either using a
12609 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
12610 // (M) or 64-bit 0x1234000000000000 (N) etc.
12654 // All assembler immediates are 64-bit integers.
12667 //===----------------------------------------------------------------------===//
12669 //===----------------------------------------------------------------------===//
12671 /// WidenVector - Given a value in the V64 register class, produce the
12684 /// getExtFactor - Determine the adjustment factor for the position when
12734 MaskSource = MaskSource->getOperand(0);
12750 !cast<ConstantSDNode>(MaskIdx)->getConstantIntValue()->equalsInt(i))
12756 MaskSourceVec = MaskSource->getOperand(0);
12759 } else if (MaskSourceVec != MaskSource->getOperand(0)) {
12767 // of elements in the source, or we would have an out-of-bounds access.
12830 "various elements of other fixed-width vectors, provided "
12843 Source->MinElt = std::min(Source->MinElt, EltNo);
12844 Source->MaxElt = std::max(Source->MaxElt, EltNo);
12858 Mask.push_back(-1);
12963 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
12974 Src.WindowBase = -NumSrcElts;
13000 Src.WindowBase = -Src.MinElt;
13030 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
13038 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
13052 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
13053 ExtractBase += NumElts * (Src - Sources.begin());
13189 SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
13206 // NumEltsPerBlock with some values possibly replaced by undef-s.
13208 // Find first non-undef element
13211 "Shuffle with all-undefs must have been caught by previous cases, "
13219 size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
13224 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
13243 // Look for the first non-undef element.
13254 return Elt != ExpectedElt++ && Elt != -1;
13262 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
13263 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
13269 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
13270 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
13276 Imm -= NumElts;
13281 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
13300 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
13319 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
13341 int LastLHSMismatch = -1, LastRHSMismatch = -1;
13344 if (M[i] == -1) {
13361 if (NumLHSMatch == NumInputElements - 1) {
13365 } else if (NumRHSMatch == NumInputElements - 1) {
13399 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
13422 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
13423 /// the specified operations to build the shuffle. ID is the perfect-shuffle
13432 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
13433 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
13463 auto getPFIDLane = [](unsigned ID, int Elt) -> int {
13465 Elt = 3 - Elt;
13468 Elt--;
13470 return (ID % 9 == 8) ? -1 : ID % 9;
13487 if (MaskElt == -1)
13488 MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
13490 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
13504 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
13536 // vrev <4 x i16> -> REV32
13541 // vrev <4 x i8> -> REV16
13563 SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
13569 unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
13605 // out of range values with 0s. We do need to make sure that any out-of-range
13606 // values are really out-of-range for a v16i8 vector.
13620 Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
13694 // Can't handle cases where vector size is not 128-bit
13702 // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
13704 // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
13705 // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
13718 // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
13723 // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
13725 Lane -= Idx * VT.getVectorNumElements() / 2;
13728 // Widen the operand to 128-bit register with undef.
13751 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
13753 // If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
13780 SDValue Tbl1 = Op->getOperand(0);
13781 SDValue Tbl2 = Op->getOperand(1);
13787 if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13788 Tbl1->getOperand(0) != Tbl2ID ||
13789 Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13790 Tbl2->getOperand(0) != Tbl2ID)
13793 if (Tbl1->getValueType(0) != MVT::v16i8 ||
13794 Tbl2->getValueType(0) != MVT::v16i8)
13797 SDValue Mask1 = Tbl1->getOperand(3);
13798 SDValue Mask2 = Tbl2->getOperand(3);
13802 TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
13805 dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
13808 TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);
13817 {ID, Tbl1->getOperand(1), Tbl1->getOperand(2),
13818 Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
13821 // Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
13823 // so custom-lower it as ZIP1-with-zeros.
13834 // FIXME: support multi-step zipping?
13849 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
13852 // Convert shuffles that are directly supported on NEON to target-specific
13856 ArrayRef<int> ShuffleMask = SVN->getMask();
13868 if (SVN->isSplat()) {
13869 int Lane = SVN->getSplatIndex();
13871 if (Lane == -1)
13877 // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
13931 } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
13978 SrcLane -= NumElts;
13997 // the PerfectShuffle-generated table to synthesize it from other shuffles.
14039 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14074 // Current lowering only supports the SVE-ACLE types.
14083 if (CIdx && (CIdx->getZExtValue() <= 3)) {
14084 SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
14085 return DAG.getNode(AArch64ISD::DUPLANE128, DL, VT, Op.getOperand(1), CI);
14114 EVT VT = BVN->getValueType(0);
14118 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14134 // Try 64-bit splatted SIMD immediate.
14155 // Try 32-bit splatted SIMD immediate.
14208 // Try 16-bit splatted SIMD immediate.
14253 // Try 32-bit splatted SIMD immediate with shifted ones.
14284 // Try 8-bit splatted SIMD immediate.
14344 ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
14347 EVT VT = Bvec->getValueType(0);
14350 if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
14352 ConstVal = FirstElt->getZExtValue();
14386 // If we're compiling for a specific vector-length, we can check if the
14406 // - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
14407 // - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
14410 EVT VT = N->getValueType(0);
14420 SDValue FirstOp = N->getOperand(0);
14422 SDValue SecondOp = N->getOperand(1);
14463 C2 = C2node->getZExtValue();
14478 C1AsAPInt = ~(C1nodeImm->getAPIntValue() << C1nodeShift->getAPIntValue());
14501 LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
14502 LLVM_DEBUG(N->dump(&DAG));
14504 LLVM_DEBUG(ResultSLI->dump(&DAG));
14513 !Subtarget->isNeonAvailable()))
14553 // We can always fall back to a non-immediate OR.
14570 for (SDValue Lane : Op->ops()) {
14572 // operands already. Otherwise, if Op is a floating-point splat
14577 CstLane->getAPIntValue().trunc(EltTy.getSizeInBits()).getZExtValue(),
14579 } else if (Lane.getNode()->isUndef()) {
14631 // FNegate each sub-element of the constant
14656 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
14670 if (auto SeqInfo = BVN->isConstantSequence()) {
14671 SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
14672 SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
14679 NumElems <= 1 || BVN->isConstant())
14689 NumElems - count_if(Op->op_values(), IsExtractElt) > 4)
14692 // Lower (pow2) BUILD_VECTORS that are <= 128-bit to a sequence of ZIP1s.
14695 Op->op_values(), [&, Undef = DAG.getUNDEF(ContainerVT)](SDValue Op) {
14726 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
14727 cast<BuildVectorSDNode>(Op)->isConstantSequence();
14733 // Thought this might return a non-BUILD_VECTOR (e.g. CONCAT_VECTORS), if so,
14743 if (BVN->isConstant()) {
14744 if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
14747 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
14751 if (ConstantFPSDNode *Const = BVN->getConstantFPSplatNode())
14752 if (Const->isZero() && !Const->isNegative())
14763 // 3) if only one constant value is used (w/ some non-constant lanes),
14765 // in the non-constant lanes.
14767 // select the values we'll be overwriting for the non-constant
14825 // ------------------------------------------------------------------
14840 // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
14857 if (!isa<ConstantSDNode>(N->getOperand(1))) {
14862 SDValue N0 = N->getOperand(0);
14880 uint64_t Val = N->getConstantOperandVal(1);
14885 if (Val - 1 == 2 * i) {
14910 // Use DUP for non-constant splats. For f32 constant splats, reduce to
14917 dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
14927 dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
14940 EltTy == MVT::f64) && "Unsupported floating-point vector type");
14957 // If we need to insert a small number of different non-constant elements and
14963 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
14967 // start by splatting that value, then replace the non-constant lanes. This
14976 ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize);
14985 // Now insert the non-constant lanes.
15112 // worse. For a vector with one or two non-undef values, that's
15132 // vector element types. After type-legalization the inserted value is
15163 !Subtarget->isNeonAvailable()))
15171 unsigned NumOperands = Op->getNumOperands();
15179 SmallVector<SDValue> ConcatOps(Op->ops());
15202 !Subtarget->isNeonAvailable()))
15223 // Check for non-constant or out of range lane.
15224 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
15225 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
15250 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
15253 // Check for non-constant or out of range lane.
15254 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15255 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
15303 // If this is extracting the upper 64-bits of a 128-bit vector, we match
15305 if (Idx * InVT.getScalarSizeInBits() == 64 && Subtarget->isNeonAvailable())
15310 useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) {
15318 SDValue Container = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PackedVT,
15321 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Container, Idx);
15369 DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
15444 !isa<ConstantSDNode>(Op->getOperand(0)))
15447 SplatVal = Op->getConstantOperandVal(0);
15456 if (isPowerOf2_64(-SplatVal)) {
15457 SplatVal = -SplatVal;
15481 DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
15492 // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
15517 if (!Subtarget->isNeonAvailable())
15524 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
15561 /// getVShiftImm - Check if this is a valid build_vector for the immediate
15572 if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
15580 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
15589 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
15592 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
15621 !Subtarget->isNeonAvailable()))
15634 if (Shift->getOpcode() != ISD::SRL)
15641 dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Shift->getOperand(1)));
15645 ShiftValue = ShiftOp1->getZExtValue();
15649 SDValue Add = Shift->getOperand(0);
15650 if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse())
15656 uint64_t ExtraBits = VT.getScalarSizeInBits() - ResVT.getScalarSizeInBits();
15657 if (ShiftValue > ExtraBits && !Add->getFlags().hasNoUnsignedWrap())
15661 dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Add->getOperand(1)));
15664 uint64_t AddValue = AddOp1->getZExtValue();
15665 if (AddValue != 1ULL << (ShiftValue - 1))
15668 RShOperand = Add->getOperand(0);
15685 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
15698 (Subtarget->hasSVE2() ||
15699 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
15709 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
15720 DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
15754 bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
15859 !Subtarget->isNeonAvailable()))
15862 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
15876 // Lower isnan(x) | isnan(never-nan) to x != x.
15877 // Lower !isnan(x) & !isnan(never-nan) to x == x.
15920 bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
15971 "Expected power-of-2 length vector");
16072 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
16081 SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
16148 // LSE has an atomic load-clear instruction, but not a load-and.
16155 return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
16157 AN->getMemOperand());
16170 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
16171 EVT VT = Node->getValueType(0);
16174 "no-stack-arg-probe")) {
16180 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
16189 SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
16192 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
16193 const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
16194 if (Subtarget->hasCustomCallingConv())
16195 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
16206 // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
16217 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
16235 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
16237 EVT VT = Node->getValueType(0);
16245 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
16258 if (Subtarget->isTargetWindows())
16268 if (Subtarget->hasSVE2())
16290 AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
16293 const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
16298 assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
16302 Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
16304 Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
16311 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
16334 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
16335 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
16352 unsigned NumElts = StructTy->getNumElements();
16353 Type *VecTy = StructTy->getElementType(0);
16355 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts);
16356 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
16372 Type *ArgTy = Arg->getType();
16373 if (!ArgTy->isVectorTy())
16377 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
16378 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
16391 Type *VecTy = I.getArgOperand(0)->getType();
16395 Type *ArgTy = Arg->getType();
16396 if (!ArgTy->isVectorTy())
16401 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts);
16402 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
16450 Type *ElTy = cast<VectorType>(I.getType())->getElementType();
16461 cast<VectorType>(I.getArgOperand(0)->getType())->getElementType();
16463 Info.memVT = MVT::getVT(I.getOperand(0)->getType());
16474 Info.memVT = MVT::getVT(Val->getType());
16505 const SDValue &Base = Mem->getBasePtr();
16510 // It's unknown whether a scalable vector has a power-of-2 bitwidth.
16511 if (Mem->getMemoryVT().isScalableVector())
16516 uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
16527 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->use_size()) {
16540 // Truncations from 64-bit GPR to 32-bit GPR is free.
16542 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16544 uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedValue();
16545 uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedValue();
16560 if (I->getOpcode() != Instruction::FMul)
16563 if (!I->hasOneUse())
16566 Instruction *User = I->user_back();
16568 if (!(User->getOpcode() == Instruction::FSub ||
16569 User->getOpcode() == Instruction::FAdd))
16573 const Function *F = I->getFunction();
16574 const DataLayout &DL = F->getDataLayout();
16575 Type *Ty = User->getOperand(0)->getType();
16583 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
16584 // 64-bit GPR.
16586 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16588 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16589 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16609 // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
16620 if (Ext->getType()->isVectorTy())
16623 for (const Use &U : Ext->uses()) {
16631 switch (Instr->getOpcode()) {
16633 if (!isa<ConstantInt>(Instr->getOperand(1)))
16638 auto &DL = Ext->getDataLayout();
16639 std::advance(GTI, U.getOperandNo()-1);
16642 // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
16644 // log2(sizeof(IdxTy)) - log2(8).
16645 if (IdxTy->isScalableTy())
16648 llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) -
16659 if (Instr->getType() == Ext->getOperand(0)->getType())
16689 for (unsigned I = IsLittleEndian ? 0 : Factor - 1; I < MaskLen; I += Factor)
16699 auto *SrcTy = cast<FixedVectorType>(Op->getType());
16700 unsigned NumElts = SrcTy->getNumElements();
16701 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16702 auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16720 auto *SrcTy = cast<FixedVectorType>(Op->getType());
16721 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16722 auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16725 if (!createTblShuffleMask(SrcWidth, DstWidth, SrcTy->getNumElements(),
16738 int NumElements = cast<FixedVectorType>(TI->getType())->getNumElements();
16739 auto *SrcTy = cast<FixedVectorType>(TI->getOperand(0)->getType());
16740 auto *DstTy = cast<FixedVectorType>(TI->getType());
16741 assert(SrcTy->getElementType()->isIntegerTy() &&
16742 "Non-integer type source vector element is not supported");
16743 assert(DstTy->getElementType()->isIntegerTy(8) &&
16746 cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16748 cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16760 // 0,8,16,..Y*8th bytes for the little-endian format
16766 : Itr * TruncFactor + (TruncFactor - 1)));
16789 Builder.CreateShuffleVector(TI->getOperand(0), ShuffleLanes), VecTy));
16848 TI->replaceAllUsesWith(FinalResult);
16849 TI->eraseFromParent();
16856 if (!EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
16863 Function *F = I->getParent()->getParent();
16864 if (!L || L->getHeader() != I->getParent() || F->hasMinSize() ||
16865 F->hasOptSize())
16868 auto *SrcTy = dyn_cast<FixedVectorType>(I->getOperand(0)->getType());
16869 auto *DstTy = dyn_cast<FixedVectorType>(I->getType());
16877 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
16878 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
16884 // If the ZExt can be lowered to a single ZExt to the next power-of-2 and
16886 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
16887 if (TTI.getCastInstrCost(I->getOpcode(), DstTy, TruncDstType,
16890 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
16899 if (SrcWidth * 4 <= DstWidth && I->hasOneUser()) {
16900 auto *SingleUser = cast<Instruction>(*I->user_begin());
16905 if (DstTy->getScalarSizeInBits() >= 64)
16910 Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
16911 DstTy, Subtarget->isLittleEndian());
16914 ZExt->replaceAllUsesWith(Result);
16915 ZExt->eraseFromParent();
16920 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
16921 DstTy->getElementType()->isFloatTy()) ||
16922 (SrcTy->getElementType()->isIntegerTy(16) &&
16923 DstTy->getElementType()->isDoubleTy()))) {
16926 Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
16927 FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
16930 I->replaceAllUsesWith(UI);
16931 I->eraseFromParent();
16936 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16937 DstTy->getElementType()->isFloatTy()) {
16939 auto *Shuffle = createTblShuffleForSExt(Builder, I->getOperand(0),
16941 Subtarget->isLittleEndian());
16946 I->replaceAllUsesWith(SI);
16947 I->eraseFromParent();
16955 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
16956 SrcTy->getElementType()->isFloatTy() &&
16957 DstTy->getElementType()->isIntegerTy(8)) {
16959 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
16962 I->replaceAllUsesWith(TruncI);
16963 I->eraseFromParent();
16964 createTblForTrunc(cast<TruncInst>(TruncI), Subtarget->isLittleEndian());
16970 // per lane of the input that is represented using 1,2,3 or 4 128-bit table
16973 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
16974 ((SrcTy->getElementType()->isIntegerTy(32) ||
16975 SrcTy->getElementType()->isIntegerTy(64)) &&
16976 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
16977 createTblForTrunc(TI, Subtarget->isLittleEndian());
17000 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
17001 unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
17003 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
17009 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
17017 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
17018 auto EC = VecTy->getElementCount();
17023 if (isa<FixedVectorType>(VecTy) && !Subtarget->isNeonAvailable() &&
17024 (!Subtarget->useSVEForFixedLengthVectors() ||
17029 !Subtarget->isSVEorStreamingSVEAvailable())
17046 if (Subtarget->useSVEForFixedLengthVectors()) {
17048 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
17051 (!Subtarget->isNeonAvailable() || VecSize > 128))) {
17059 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
17063 if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
17064 return ScalableVectorType::get(VTy->getElementType(), 2);
17066 if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
17067 return ScalableVectorType::get(VTy->getElementType(), 4);
17069 if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
17070 return ScalableVectorType::get(VTy->getElementType(), 8);
17072 if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
17073 return ScalableVectorType::get(VTy->getElementType(), 8);
17075 if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
17076 return ScalableVectorType::get(VTy->getElementType(), 2);
17078 if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
17079 return ScalableVectorType::get(VTy->getElementType(), 4);
17081 if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
17082 return ScalableVectorType::get(VTy->getElementType(), 8);
17084 if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
17085 return ScalableVectorType::get(VTy->getElementType(), 16);
17101 return Intrinsic::getOrInsertDeclaration(M, SVELoads[Factor - 2], {LDVTy});
17103 return Intrinsic::getOrInsertDeclaration(M, NEONLoads[Factor - 2],
17118 return Intrinsic::getOrInsertDeclaration(M, SVEStores[Factor - 2], {STVTy});
17120 return Intrinsic::getOrInsertDeclaration(M, NEONStores[Factor - 2],
17144 const DataLayout &DL = LI->getDataLayout();
17146 VectorType *VTy = Shuffles[0]->getType();
17159 return SI->hasOneUse() && match(SI->user_back(), m_UIToFP(m_Value())) &&
17160 SI->getType()->getScalarSizeInBits() * 4 ==
17161 SI->user_back()->getType()->getScalarSizeInBits();
17171 Type *EltTy = FVTy->getElementType();
17172 if (EltTy->isPointerTy())
17174 FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
17176 // If we're going to generate more than one load, reset the sub-vector type
17178 FVTy = FixedVectorType::get(FVTy->getElementType(),
17179 FVTy->getNumElements() / NumLoads);
17187 Value *BaseAddr = LI->getPointerOperand();
17189 Type *PtrTy = LI->getPointerOperandType();
17190 Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
17191 LDVTy->getElementCount());
17193 Function *LdNFunc = getStructuredLoadFunction(LI->getModule(), Factor,
17196 // Holds sub-vectors extracted from the load intrinsic return values. The
17197 // sub-vectors are associated with the shufflevector instructions they will
17204 getSVEPredPatternFromNumElements(FVTy->getNumElements());
17205 if (Subtarget->getMinSVEVectorSizeInBits() ==
17206 Subtarget->getMaxSVEVectorSizeInBits() &&
17207 Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
17211 ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
17221 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
17222 FVTy->getNumElements() * Factor);
17230 // Extract and store the sub-vectors returned by the load intrinsic.
17240 ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0));
17243 if (EltTy->isPointerTy())
17245 SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
17246 FVTy->getNumElements()));
17252 // Replace uses of the shufflevector instructions with the sub-vectors
17254 // associated with more than one sub-vector, those sub-vectors will be
17260 SVI->replaceAllUsesWith(WideVec);
17272 Ptr->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
17275 if (It->isDebugOrPseudoInst())
17277 if (MaxLookupDist-- == 0)
17281 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
17284 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.sextOrTrunc(IdxWidth))
17326 auto *VecTy = cast<FixedVectorType>(SVI->getType());
17327 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
17329 unsigned LaneLen = VecTy->getNumElements() / Factor;
17330 Type *EltTy = VecTy->getElementType();
17333 const DataLayout &DL = SI->getDataLayout();
17344 Value *Op0 = SVI->getOperand(0);
17345 Value *Op1 = SVI->getOperand(1);
17350 if (EltTy->isPointerTy()) {
17353 cast<FixedVectorType>(Op0->getType())->getNumElements();
17364 // and sub-vector type to something legal.
17366 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
17372 Value *BaseAddr = SI->getPointerOperand();
17374 auto Mask = SVI->getShuffleMask();
17377 // If mask is `poison`, `Mask` may be a vector of -1s.
17384 // that points to BaseAddr+16 or BaseAddr-16 then it can be better left as a
17386 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
17388 hasNearbyPairedStore(SI->getIterator(), SI->getParent()->end(), BaseAddr,
17390 hasNearbyPairedStore(SI->getReverseIterator(), SI->getParent()->rend(),
17394 Type *PtrTy = SI->getPointerOperandType();
17395 Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
17396 STVTy->getElementCount());
17398 Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
17404 getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
17405 if (Subtarget->getMinSVEVectorSizeInBits() ==
17406 Subtarget->getMaxSVEVectorSizeInBits() &&
17407 Subtarget->getMinSVEVectorSizeInBits() ==
17412 ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
17433 StartMask = Mask[IdxJ] - j;
17449 ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));
17460 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
17477 VectorType *VTy = cast<VectorType>(DeinterleavedValues[0]->getType());
17479 const DataLayout &DL = LI->getModule()->getDataLayout();
17485 // the code from lowerInterleavedLoad to obtain the correct container type.
17486 if (UseScalable && !VTy->isScalableTy())
17491 VectorType::get(VTy->getElementType(),
17492 VTy->getElementCount().divideCoefficientBy(NumLoads));
17494 Type *PtrTy = LI->getPointerOperandType();
17495 Function *LdNFunc = getStructuredLoadFunction(LI->getModule(), Factor,
17502 Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue());
17504 Value *BaseAddr = LI->getPointerOperand();
17518 Builder.getInt64(I * LdTy->getElementCount().getKnownMinValue());
17523 LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump());
17527 DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]);
17537 DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
17551 VectorType *VTy = cast<VectorType>(InterleavedValues[0]->getType());
17552 const DataLayout &DL = SI->getModule()->getDataLayout();
17559 // the code from lowerInterleavedStore to obtain the correct container type.
17560 if (UseScalable && !VTy->isScalableTy())
17566 VectorType::get(VTy->getElementType(),
17567 VTy->getElementCount().divideCoefficientBy(NumStores));
17569 Type *PtrTy = SI->getPointerOperandType();
17570 Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
17575 Value *BaseAddr = SI->getPointerOperand();
17580 Builder.CreateVectorSplat(StTy->getElementCount(), Builder.getTrue());
17594 Builder.getInt64(I * StTy->getElementCount().getKnownMinValue());
17600 StoreOperands[StoreOperands.size() - 1] = Address;
17610 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17611 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17612 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
17640 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17641 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17642 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
17667 // 12-bit optionally shifted immediates are legal for adds.
17685 if (!Subtarget->hasSVE2())
17716 // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
17729 const int64_t C1 = C1Node->getSExtValue();
17730 const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
17750 /// isLegalAddressingMode - Return true if the addressing mode represented
17757 // reg + 9-bit signed offset
17758 // reg + SIZE_IN_BYTES * 12-bit unsigned offset
17789 if (Ty->isScalableTy()) {
17791 // See if we have a foldable vscale-based offset, for vector types which
17802 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
17810 // No scalable offsets allowed for non-scalable types.
17817 if (Ty->isSized()) {
17824 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.BaseOffs,
17856 return Subtarget->hasFullFP16();
17869 switch (Ty->getScalarType()->getTypeID()) {
17886 // LR is a callee-save register, but we must treat it as clobbered by any call
17888 // as implicit-defs for stackmaps and patchpoints.
17903 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17904 N->getOpcode() == ISD::SRL) &&
17907 SDValue ShiftLHS = N->getOperand(0);
17908 EVT VT = N->getValueType(0);
17910 if (!ShiftLHS->hasOneUse())
17914 !ShiftLHS.getOperand(0)->hasOneUse())
17927 if (N->getOpcode() == ISD::SHL)
17928 if (auto *SHLC = dyn_cast<ConstantSDNode>(N->getOperand(1)))
17929 return SRLC->getZExtValue() == SHLC->getZExtValue();
17940 assert(N->getOpcode() == ISD::XOR &&
17941 (N->getOperand(0).getOpcode() == ISD::SHL ||
17942 N->getOperand(0).getOpcode() == ISD::SRL) &&
17946 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
17947 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
17950 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
17951 unsigned ShiftAmt = ShiftC->getZExtValue();
17952 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17953 if (N->getOperand(0).getOpcode() == ISD::SHL)
17954 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
17955 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
17964 assert(((N->getOpcode() == ISD::SHL &&
17965 N->getOperand(0).getOpcode() == ISD::SRL) ||
17966 (N->getOpcode() == ISD::SRL &&
17967 N->getOperand(0).getOpcode() == ISD::SHL)) &&
17968 "Expected shift-shift mask");
17970 if (!N->getOperand(0)->hasOneUse())
17974 EVT VT = N->getValueType(0);
17975 if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
17976 auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
17977 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17978 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
17983 if (N->getOpcode() == ISD::SHL && N->hasOneUse()) {
17984 if (auto C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
17985 unsigned ShlAmt = C2->getZExtValue();
17986 if (auto ShouldADD = *N->user_begin();
17987 ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
17988 if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
17989 unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
17991 isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
18008 assert(Ty->isIntegerTy());
18010 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18021 Val &= (1LL << 32) - 1;
18037 /// xor (sra X, elt_size(X)-1), -1
18042 EVT VT = N->getValueType(0);
18043 if (!Subtarget->hasNEON() || !VT.isVector())
18048 SDValue Shift = N->getOperand(0);
18049 SDValue Ones = N->getOperand(1);
18057 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
18081 if (N->getValueType(0) != MVT::i32)
18084 SDValue VecReduceOp0 = N->getOperand(0);
18087 if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
18092 if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
18093 ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
18096 SDValue SUB = ABS->getOperand(0);
18097 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
18098 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
18100 if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
18101 SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
18113 SDValue EXT0 = SUB->getOperand(0);
18114 SDValue EXT1 = SUB->getOperand(1);
18116 if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
18117 EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
18125 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
18128 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
18136 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
18139 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
18161 if (!ST->isNeonAvailable())
18164 if (!ST->hasDotProd())
18167 SDValue Op0 = N->getOperand(0);
18168 if (N->getValueType(0) != MVT::i32 || Op0.getValueType().isScalableVT() ||
18194 if (!ST->hasMatMulInt8())
18215 // For non-mla reductions B can be set to 1. For MLA we take the operand of
18237 return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
18260 DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), ConcatSDot16);
18276 DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
18277 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), VecReduceAdd16,
18332 // We can convert a UADDV(add(zext(64-bit source), zext(64-bit source))) into
18333 // UADDLV(concat), where the concat represents the 64-bit zext sources.
18335 // Look for add(zext(64-bit source), zext(64-bit source)), returning
18349 // Check zext VTs are the same and 64-bit length.
18373 SDValue A = N->getOperand(0);
18376 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
18397 if (isIntDivCheap(N->getValueType(0), Attr))
18400 EVT VT = N->getValueType(0);
18405 (VT.isFixedLengthVector() && Subtarget->useSVEForFixedLengthVectors()))
18413 // If the divisor is 2 or -2, the default expansion is better. It will add
18414 // (N->getValueType(0) >> (BitWidth - 1)) to it before shifting right.
18416 Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true))
18427 if (isIntDivCheap(N->getValueType(0), Attr))
18430 EVT VT = N->getValueType(0);
18434 if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
18447 SDValue N0 = N->getOperand(0);
18448 SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
18492 /// Calculates what the pre-extend type is, based on the extension
18496 /// pre-extend type is pulled directly from the operand, while other extend
18515 return TypeNode->getVT();
18523 uint32_t Mask = Constant->getZExtValue();
18550 SDValue Extend = BV->getOperand(0);
18565 // Restrict valid pre-extend data type
18573 for (SDValue Op : drop_begin(BV->ops())) {
18601 for (SDValue Op : BV->ops())
18612 cast<ShuffleVectorSDNode>(BV)->getMask());
18624 EVT VT = Mul->getValueType(0);
18628 SDValue Op0 = performBuildShuffleExtendCombine(Mul->getOperand(0), DAG);
18629 SDValue Op1 = performBuildShuffleExtendCombine(Mul->getOperand(1), DAG);
18636 return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0),
18637 Op1 ? Op1 : Mul->getOperand(1));
18640 // Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz
18643 EVT VT = N->getValueType(0);
18647 if (N->getOperand(0).getOpcode() != ISD::AND ||
18648 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
18651 SDValue And = N->getOperand(0);
18655 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
18662 V3 != (HalfSize - 1))
18680 EVT VT = N->getValueType(0);
18682 (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
18683 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) ||
18684 (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
18685 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) ||
18686 N->getOperand(0).getOperand(0).getValueType() !=
18687 N->getOperand(1).getOperand(0).getValueType())
18690 if (N->getOpcode() == ISD::MUL &&
18691 N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
18694 SDValue N0 = N->getOperand(0).getOperand(0);
18695 SDValue N1 = N->getOperand(1).getOperand(0);
18706 SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0);
18707 SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1);
18708 SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1);
18709 return DAG.getNode(N->getOpcode() == ISD::MUL ? N->getOperand(0).getOpcode()
18730 // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
18732 // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
18734 EVT VT = N->getValueType(0);
18735 SDValue N0 = N->getOperand(0);
18736 SDValue N1 = N->getOperand(1);
18740 auto IsAddSubWith1 = [&](SDValue V) -> bool {
18741 AddSubOpc = V->getOpcode();
18742 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
18743 SDValue Opnd = V->getOperand(1);
18744 MulOper = V->getOperand(0);
18748 return C->isOne();
18768 const APInt &ConstValue = C->getAPIntValue();
18773 (N0->getOpcode() == ISD::TRUNCATE &&
18774 (IsSVECntIntrinsic(N0->getOperand(0)))))
18781 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
18782 // 64-bit is 5 cycles, so this is always a win.
18794 if (N0->hasOneUse() && (isSignExtended(N0, DAG) ||
18799 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ADD ||
18800 N->user_begin()->getOpcode() == ISD::SUB))
18835 // C = 45 is equal to (1+4)*(1+8), we don't decompose it into (1+2)*(16-1) as
18836 // the (2^N - 1) can't be execused via a single instruction.
18843 APInt NVMinus1 = N - 1;
18853 // C = 11 is equal to (1+4)*2+1, we don't decompose it into (1+2)*4-1 as
18854 // the (2^N - 1) can't be execused via a single instruction.
18856 APInt CVMinus1 = C - 1;
18860 APInt SCVMinus1 = CVMinus1.ashr(TrailingZeroes) - 1;
18870 // Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
18871 // C = 29 is equal to 1 - (1 - 2^3) * 2^2.
18873 APInt CVMinus1 = C - 1;
18889 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18890 // (mul x, (2^(N-M) - 1) * 2^M) => (sub (shl x, N), (shl x, M))
18895 // (mul x, 1 - (1 - 2^M) * 2^N))
18896 // => MV = sub (x - (shl x, M)); sub (x - (shl MV, N))
18897 APInt SCVMinus1 = ShiftedConstValue - 1;
18911 if (Subtarget->hasALULSLFast() &&
18913 APInt CVMMinus1 = CVM - 1;
18914 APInt CVNMinus1 = CVN - 1;
18923 if (Subtarget->hasALULSLFast() &&
18934 if (Subtarget->hasALULSLFast() &&
18945 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18946 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
18947 // (mul x, -(2^(N-M) - 1) * 2^M) => (sub (shl x, M), (shl x, N))
18948 APInt SCVPlus1 = -ShiftedConstValue + 1;
18949 APInt CVNegPlus1 = -ConstValue + 1;
18950 APInt CVNegMinus1 = -ConstValue - 1;
18968 // Take advantage of vector comparisons producing 0 or -1 in each lane to
18972 // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
18979 EVT VT = N->getValueType(0);
18980 if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
18981 N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
18982 VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
18986 // make the transformation for non-constant splats as well, but it's unclear
18990 dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
18992 if (!BV->isConstant())
18997 EVT IntVT = BV->getValueType(0);
19000 SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
19004 N->getOperand(0)->getOperand(0), MaskConst);
19012 /// Tries to replace scalar FP <-> INT conversions with SVE in streaming
19018 if (N->isStrictFPOpcode())
19024 if (!Subtarget->isSVEorStreamingSVEAvailable() ||
19025 (!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
19032 SDValue SrcVal = N->getOperand(0);
19034 EVT DestTy = N->getValueType(0);
19057 SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec);
19073 EVT VT = N->getValueType(0);
19078 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
19081 // If the result of an integer load is only used by an integer-to-float
19083 // This eliminates an "integer-to-vector-move" UOP and improves throughput.
19084 SDValue N0 = N->getOperand(0);
19085 if (Subtarget->isNeonAvailable() && ISD::isNormalLoad(N0.getNode()) &&
19088 !cast<LoadSDNode>(N0)->isVolatile()) {
19090 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
19091 LN0->getPointerInfo(), LN0->getAlign(),
19092 LN0->getMemOperand()->getFlags());
19099 (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
19106 /// Fold a floating-point multiply by power of two into floating-point to
19107 /// fixed-point conversion.
19115 if (!Subtarget->isNeonAvailable())
19118 if (!N->getValueType(0).isSimple())
19121 SDValue Op = N->getOperand(0);
19128 SDValue ConstVec = Op->getOperand(1);
19135 (FloatBits != 16 || !Subtarget->hasFullFP16()))
19138 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
19143 // Avoid conversions where iN is larger than the float (e.g., float -> i64).
19150 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
19151 if (C == -1 || C == 0 || C > Bits)
19158 if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
19159 N->getOpcode() == ISD::FP_TO_UINT_SAT) {
19160 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
19166 bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
19167 N->getOpcode() == ISD::FP_TO_SINT_SAT);
19173 Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
19176 FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
19183 EVT VT = N->getValueType(0);
19198 SDValue N0 = N->getOperand(0);
19202 SDValue N1 = N->getOperand(1);
19206 // InstCombine does (not (neg a)) => (add a -1).
19207 // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
19209 for (int i = 1; i >= 0; --i) {
19210 for (int j = 1; j >= 0; --j) {
19211 SDValue O0 = N0->getOperand(i);
19212 SDValue O1 = N1->getOperand(j);
19219 SubSibling = N0->getOperand(1 - i);
19220 AddSibling = N1->getOperand(1 - j);
19224 AddSibling = N0->getOperand(1 - i);
19225 SubSibling = N1->getOperand(1 - j);
19247 uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
19248 for (int i = 1; i >= 0; --i)
19249 for (int j = 1; j >= 0; --j) {
19252 if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) &&
19253 ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) &&
19255 return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
19256 N0->getOperand(1 - i), N1->getOperand(1 - j));
19258 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
19259 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
19265 ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
19266 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
19268 CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
19274 return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
19275 N0->getOperand(1 - i), N1->getOperand(1 - j));
19292 EVT VT = N->getValueType(0);
19293 SDValue CSel0 = N->getOperand(0);
19294 SDValue CSel1 = N->getOperand(1);
19300 if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
19313 if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
19328 if (N->getOpcode() == ISD::AND) {
19341 if (Op1 && Op1->getAPIntValue().isNegative() &&
19342 Op1->getAPIntValue().sgt(-32)) {
19344 // if the Op1 is a constant in the range [-31, -1], we
19347 DAG.getConstant(Op1->getAPIntValue().abs(), DL, Op1->getValueType(0));
19363 EVT VT = N->getValueType(0);
19397 if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
19398 if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
19399 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
19406 SDValue Op = N->getOperand(0);
19409 Op = Op->getOperand(0);
19418 SDValue Src = N->getOperand(0);
19419 unsigned Opc = Src->getOpcode();
19423 SDValue UnpkOp = Src->getOperand(0);
19424 SDValue Dup = N->getOperand(1);
19430 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
19434 uint64_t ExtVal = C->getZExtValue();
19436 auto MaskAndTypeMatch = [ExtVal](EVT VT) -> bool {
19442 // If the mask is fully covered by the unpack, we don't need to push
19444 EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
19448 // If this is 'and (uunpklo/hi (extload MemTy -> ExtTy)), mask', then check
19449 // to see if the mask is all-ones of size MemTy.
19451 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() == ISD::ZEXTLOAD ||
19452 MaskedLoadOp->getExtensionType() == ISD::EXTLOAD)) {
19453 EVT EltTy = MaskedLoadOp->getMemoryVT().getVectorElementType();
19459 APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
19463 Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
19467 UnpkOp->getValueType(0), UnpkOp, Dup);
19469 return DAG.getNode(Opc, DL, N->getValueType(0), And);
19477 if (isAllActivePredicate(DAG, N->getOperand(0)))
19478 return N->getOperand(1);
19479 if (isAllActivePredicate(DAG, N->getOperand(1)))
19480 return N->getOperand(0);
19485 SDValue Mask = N->getOperand(1);
19492 // SVE load instructions perform an implicit zero-extend, which makes them
19498 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
19515 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
19534 SDValue SetCC = N->getOperand(0);
19535 EVT VT = N->getValueType(0);
19541 for (auto U : N->users())
19542 if (U->getOpcode() == ISD::SELECT)
19545 // Check if the operand is a SETCC node with floating-point comparison
19570 SDValue LHS = N->getOperand(0);
19571 SDValue RHS = N->getOperand(1);
19572 EVT VT = N->getValueType(0);
19633 SDValue LHS = N->getOperand(0);
19634 SDValue RHS = N->getOperand(1);
19635 EVT VT = N->getValueType(0);
19638 if (!N->getFlags().hasAllowReassociation())
19641 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
19653 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(1), B, N->getFlags()),
19655 VCMLA->setFlags(A->getFlags());
19705 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19707 if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
19710 SDValue N0 = N->getOperand(0);
19714 !isNullConstant(N->getOperand(1)))
19718 // flag-setting operation.
19725 return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE);
19728 // Materialize : Idx = (add (mul vscale, NumEls), -1)
19735 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19737 if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
19740 SDValue N0 = N->getOperand(0);
19746 // Idx == (add (mul vscale, NumEls), -1)
19747 SDValue Idx = N->getOperand(1);
19759 // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
19762 return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE);
19768 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19775 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
19777 EVT VT = N->getValueType(0);
19779 bool IsStrict = N0->isStrictFPOpcode();
19781 // extract(dup x) -> x
19790 // ->
19795 if (isNullConstant(N1) && hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
19798 SDValue N00 = N0->getOperand(IsStrict ? 1 : 0);
19799 SDValue N01 = N0->getOperand(IsStrict ? 2 : 1);
19810 if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
19811 Other == Shuffle->getOperand(0)) {
19817 return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2);
19823 SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
19825 {N0->getOperand(0), Extract1, Extract2});
19839 EVT VT = N->getValueType(0);
19840 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
19841 unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
19846 if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
19848 SDValue N00 = N0->getOperand(0);
19849 SDValue N10 = N1->getOperand(0);
19857 // ->
19861 // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
19863 // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
19892 N00->getOperand(1) == N10->getOperand(1)) {
19893 SDValue N000 = N00->getOperand(0);
19894 SDValue N100 = N10->getOperand(0);
19895 uint64_t N001ConstVal = N00->getConstantOperandVal(1),
19896 N101ConstVal = N10->getConstantOperandVal(1),
19897 NScalarSize = N->getValueType(0).getScalarSizeInBits();
19904 DAG.getConstant(N001ConstVal - NScalarSize, dl, MVT::i32);
19911 if (N->getOperand(0).getValueType() == MVT::v4i8 ||
19912 N->getOperand(0).getValueType() == MVT::v2i16 ||
19913 N->getOperand(0).getValueType() == MVT::v2i8) {
19914 EVT SrcVT = N->getOperand(0).getValueType();
19918 if (N->getNumOperands() % 2 == 0 &&
19919 all_of(N->op_values(), [SrcVT](SDValue V) {
19925 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
19926 LD->getExtensionType() == ISD::NON_EXTLOAD;
19929 EVT NVT = EVT::getVectorVT(*DAG.getContext(), FVT, N->getNumOperands());
19932 for (unsigned i = 0; i < N->getNumOperands(); i++) {
19933 SDValue V = N->getOperand(i);
19938 SDValue NewLoad = DAG.getLoad(FVT, dl, LD->getChain(),
19939 LD->getBasePtr(), LD->getMemOperand());
19944 return DAG.getBitcast(N->getValueType(0),
19954 // ->
19957 if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
19958 N1Opc == ISD::TRUNCATE && N->isOnlyUserOf(N0.getNode()) &&
19959 N->isOnlyUserOf(N1.getNode())) {
19961 return V->getOpcode() == ISD::XOR &&
19964 SDValue N00 = N0->getOperand(0);
19965 SDValue N10 = N1->getOperand(0);
19966 if (isBitwiseVectorNegate(N00) && N0->isOnlyUserOf(N00.getNode()) &&
19967 isBitwiseVectorNegate(N10) && N1->isOnlyUserOf(N10.getNode())) {
19972 N00->getOperand(0)),
19974 N10->getOperand(0))),
19984 // Optimise concat_vectors of two identical binops with a 128-bit destination
19986 // concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d))
19987 if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() &&
19988 DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() &&
19989 N1->hasOneUse()) {
19990 SDValue N00 = N0->getOperand(0);
19991 SDValue N01 = N0->getOperand(1);
19992 SDValue N10 = N1->getOperand(0);
19993 SDValue N11 = N1->getOperand(1);
20023 if (Imm != 1ULL << (ShtAmt - 1))
20028 // concat(rshrn(x), rshrn(y)) -> rshrn(concat(x, y))
20029 if (N->getNumOperands() == 2 && IsRSHRN(N0) &&
20041 DAG.getConstant(1ULL << (N0.getConstantOperandVal(1) - 1), dl, BVT));
20048 if (N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
20061 if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
20067 // Canonicalise concat_vectors so that the right-hand vector has as few
20068 // bit-casts as possible before its real operation. The primary matching
20070 // which depend on the operation being performed on this right-hand vector.
20076 if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
20078 SDValue RHS = N1->getOperand(0);
20085 dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
20101 EVT VT = N->getValueType(0);
20105 SDValue V = N->getOperand(0);
20108 // blocks this combine because the non-const case requires custom lowering.
20110 // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
20122 SDValue Vec = N->getOperand(0);
20123 SDValue SubVec = N->getOperand(1);
20124 uint64_t IdxVal = N->getConstantOperandVal(2);
20144 // Fold insert_subvector -> concat_vectors
20145 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
20146 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
20173 // register allocator to avoid cross-class register copies that aren't
20177 SDValue Op1 = N->getOperand(1);
20182 SDValue IID = N->getOperand(0);
20183 SDValue Shift = N->getOperand(2);
20186 EVT ResTy = N->getValueType(0);
20208 // AArch64 high-vector "long" operations are formed by performing the non-high
20217 // (dupv64 scalar) --> (extract_high (dup128 scalar))
20221 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
20257 N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops());
20313 SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
20319 // - csel 1, 0, cc
20320 // - csel 0, 1, !cc
20341 if (!TValue->isOne()) {
20347 return TValue->isOne() && FValue->isZero();
20355 isSetCC(Op->getOperand(0), Info));
20360 // -->
20365 assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
20366 SDValue LHS = Op->getOperand(0);
20367 SDValue RHS = Op->getOperand(1);
20386 ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
20387 : InfoAndKind.Info.Generic.Opnd0->getValueType();
20405 EVT VT = Op->getValueType(0);
20410 // ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
20412 EVT VT = N->getValueType(0);
20414 if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
20417 SDValue LHS = N->getOperand(0);
20418 SDValue RHS = N->getOperand(1);
20423 auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
20424 auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
20425 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
20428 SDValue Op1 = LHS->getOperand(0);
20429 SDValue Op2 = RHS->getOperand(0);
20439 EVT ValVT = Val1->getValueType(0);
20449 /// CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc)
20451 EVT VT = N->getValueType(0);
20452 if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD)
20455 SDValue LHS = N->getOperand(0);
20456 SDValue RHS = N->getOperand(1);
20482 (CTVal->isOne() || CFVal->isOne())) &&
20484 (CTVal->isOne() || CFVal->isAllOnes())))
20488 if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() &&
20489 !CFVal->isOne()) {
20495 // Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc)
20496 if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() &&
20497 !CFVal->isAllOnes()) {
20498 APInt C = -1 * CFVal->getAPIntValue();
20506 APInt ADDC = CTVal->getAPIntValue();
20511 assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) ||
20512 (LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) &&
20522 // ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
20524 EVT VT = N->getValueType(0);
20525 if (N->getOpcode() != ISD::ADD)
20528 SDValue Dot = N->getOperand(0);
20529 SDValue A = N->getOperand(1);
20558 // (neg (csel X, Y)) -> (csel (neg X), (neg Y))
20567 SDValue CSel = N->getOperand(1);
20568 if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse())
20589 // which act on the high-half of their inputs. They are normally matched by
20594 // -> uaddl2 vD, vN, vM
20605 MVT VT = N->getSimpleValueType(0);
20607 if (N->getOpcode() == ISD::ADD)
20613 SDValue LHS = N->getOperand(0);
20614 SDValue RHS = N->getOperand(1);
20638 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
20643 !Op.getNode()->hasAnyUseOfValue(0);
20667 SDValue CmpOp = Op->getOperand(2);
20679 SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1);
20684 return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(),
20685 Op->getOperand(0), Op->getOperand(1),
20691 SDValue LHS = N->getOperand(0);
20692 SDValue RHS = N->getOperand(1);
20693 SDValue Cond = N->getOperand(2);
20698 EVT VT = N->getValueType(0);
20710 EVT VT = N->getValueType(0);
20714 SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1),
20715 Elt2 = N->getOperand(2), Elt3 = N->getOperand(3);
20716 if (Elt0->getOpcode() == ISD::FP_ROUND &&
20717 Elt1->getOpcode() == ISD::FP_ROUND &&
20718 isa<ConstantSDNode>(Elt0->getOperand(1)) &&
20719 isa<ConstantSDNode>(Elt1->getOperand(1)) &&
20720 Elt0->getConstantOperandVal(1) == Elt1->getConstantOperandVal(1) &&
20721 Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20722 Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20724 isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) &&
20725 isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) &&
20726 Elt0->getOperand(0)->getOperand(0) ==
20727 Elt1->getOperand(0)->getOperand(0) &&
20728 Elt0->getOperand(0)->getConstantOperandVal(1) == 0 &&
20729 Elt1->getOperand(0)->getConstantOperandVal(1) == 1) {
20730 SDValue LowLanesSrcVec = Elt0->getOperand(0)->getOperand(0);
20733 if (Elt2->getOpcode() == ISD::UNDEF &&
20734 Elt3->getOpcode() == ISD::UNDEF) {
20736 } else if (Elt2->getOpcode() == ISD::FP_ROUND &&
20737 Elt3->getOpcode() == ISD::FP_ROUND &&
20738 isa<ConstantSDNode>(Elt2->getOperand(1)) &&
20739 isa<ConstantSDNode>(Elt3->getOperand(1)) &&
20740 Elt2->getConstantOperandVal(1) ==
20741 Elt3->getConstantOperandVal(1) &&
20742 Elt2->getOperand(0)->getOpcode() ==
20744 Elt3->getOperand(0)->getOpcode() ==
20747 isa<ConstantSDNode>(Elt2->getOperand(0)->getOperand(1)) &&
20748 isa<ConstantSDNode>(Elt3->getOperand(0)->getOperand(1)) &&
20749 Elt2->getOperand(0)->getOperand(0) ==
20750 Elt3->getOperand(0)->getOperand(0) &&
20751 Elt2->getOperand(0)->getConstantOperandVal(1) == 0 &&
20752 Elt3->getOperand(0)->getConstantOperandVal(1) == 1) {
20753 SDValue HighLanesSrcVec = Elt2->getOperand(0)->getOperand(0);
20763 Elt0->getOperand(1));
20770 SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
20771 if (Elt0->getOpcode() == ISD::FP_EXTEND &&
20772 Elt1->getOpcode() == ISD::FP_EXTEND &&
20773 Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20774 Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20775 Elt0->getOperand(0)->getOperand(0) ==
20776 Elt1->getOperand(0)->getOperand(0) &&
20778 isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) &&
20779 isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) &&
20780 Elt0->getOperand(0)->getConstantOperandVal(1) + 1 ==
20781 Elt1->getOperand(0)->getConstantOperandVal(1) &&
20784 Elt0->getOperand(0)->getConstantOperandVal(1) %
20787 SDValue SrcVec = Elt0->getOperand(0)->getOperand(0);
20792 SDValue SubvectorIdx = Elt0->getOperand(0)->getOperand(1);
20802 // extract subvector where the inner vector is any-extended to the
20813 SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
20814 // Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT.
20815 if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20816 Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20818 isa<ConstantSDNode>(Elt0->getOperand(1)) &&
20819 isa<ConstantSDNode>(Elt1->getOperand(1)) &&
20821 Elt0->getOperand(0) == Elt1->getOperand(0) &&
20823 Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1) &&
20826 Elt0->getConstantOperandVal(1) % VT.getVectorMinNumElements() == 0) {
20827 SDValue VecToExtend = Elt0->getOperand(0);
20832 SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
20845 EVT VT = N->getValueType(0);
20846 SDValue N0 = N->getOperand(0);
20857 // i32 (trunc (extract Vi64, idx)) -> i32 (extract (nvcast Vi32), idx*2))
20877 cast<ConstantSDNode>(ExtractIndexNode)->getZExtValue();
20894 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
20903 uint64_t AndMask = CSD->getZExtValue();
20912 // (N - Y) + Z --> (Z - Y) + N
20935 EVT VT = N->getValueType(0);
20946 if (N->getOpcode() != ISD::ADD)
20951 EVT VT = N->getValueType(0);
20956 SDValue LHS = N->getOperand(0);
20957 SDValue RHS = N->getOperand(1);
20971 // with LSL (shift > 4). For the rest of processors, this is no-op for
20984 if (N->getOpcode() != ISD::SUB)
20987 SDValue Add = N->getOperand(1);
20988 SDValue X = N->getOperand(0);
21006 EVT VT = N->getValueType(0);
21028 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::SUB)
21031 if (!N->getValueType(0).isFixedLengthVector())
21034 auto performOpt = [&DAG, &N](SDValue Op0, SDValue Op1) -> SDValue {
21038 if (!cast<ConstantSDNode>(Op1->getOperand(1))->isZero())
21041 SDValue MulValue = Op1->getOperand(0);
21054 DAG.getNode(N->getOpcode(), SDLoc(N), ScalableVT, {ScaledOp, MulValue});
21055 return convertFromScalableVector(DAG, N->getValueType(0), NewValue);
21058 if (SDValue res = performOpt(N->getOperand(0), N->getOperand(1)))
21060 else if (N->getOpcode() == ISD::ADD)
21061 return performOpt(N->getOperand(1), N->getOperand(0));
21069 EVT VT = N->getValueType(0);
21071 DAG.getTargetLoweringInfo().isOperationExpand(N->getOpcode(), MVT::v1i64))
21073 SDValue Op0 = N->getOperand(0);
21074 SDValue Op1 = N->getOperand(1);
21098 DAG.getNode(N->getOpcode(), DL, MVT::v1i64, Op0, Op1),
21104 if (!BV->hasOneUse())
21107 if (!Ld || !Ld->isSimple())
21115 if (!Ld || !Ld->isSimple() || !BV.getOperand(Op).hasOneUse())
21146 if (SV1->getMaskElt(I) != I ||
21147 SV1->getMaskElt(I + NumSubElts) != I + NumSubElts ||
21148 SV1->getMaskElt(I + NumSubElts * 2) != I + NumSubElts * 2 ||
21149 SV1->getMaskElt(I + NumSubElts * 3) != I + NumElts)
21152 if (SV2->getMaskElt(I) != I ||
21153 SV2->getMaskElt(I + NumSubElts) != I + NumSubElts ||
21154 SV2->getMaskElt(I + NumSubElts * 2) != I + NumElts)
21157 auto *Ld0 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(0));
21158 auto *Ld1 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(1));
21159 auto *Ld2 = dyn_cast<LoadSDNode>(SV2->getOperand(1).getOperand(0));
21161 if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() ||
21162 !Ld2->isSimple() || !Ld3->isSimple())
21187 unsigned Size = get<0>(L)->getValueType(0).getSizeInBits();
21188 return Size == get<1>(L)->getValueType(0).getSizeInBits() &&
21229 EVT VT = N->getValueType(0);
21235 SDValue Other = N->getOperand(0);
21236 SDValue Shift = N->getOperand(1);
21237 if (Shift.getOpcode() != ISD::SHL && N->getOpcode() != ISD::SUB)
21286 SDValue Load = DAG.getLoad(DLoadVT, SDLoc(L0), L0->getChain(),
21287 L0->getBasePtr(), L0->getPointerInfo(),
21288 L0->getOriginalAlign());
21297 for (const auto &[O0, O1] : zip(Op0->op_values(), Op1->op_values()))
21343 return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
21374 // Massage DAGs which we can use the high-half "long" operations on into
21377 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
21387 SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
21388 SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
21394 // just as well use the non-high version) so look for a corresponding extract
21408 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
21410 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
21411 N->getOperand(0), LHS, RHS);
21415 MVT ElemTy = N->getSimpleValueType(0).getScalarType();
21419 if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
21423 if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
21429 } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
21430 ShiftAmount = CVN->getSExtValue();
21436 return N->getOperand(1);
21471 ShiftAmount = -ShiftAmount;
21478 EVT VT = N->getValueType(0);
21479 SDValue Op = N->getOperand(1);
21486 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
21488 DAG.getConstant(-ShiftAmount, dl, MVT::i32));
21489 if (N->getValueType(0) == MVT::i64)
21496 if (N->getValueType(0) == MVT::i64)
21509 SDValue AndN = N->getOperand(2);
21514 if (!CMask || CMask->getZExtValue() != Mask)
21518 N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
21524 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
21526 N->getOperand(1).getSimpleValueType(),
21527 N->getOperand(1)),
21533 SDValue Op1 = N->getOperand(1);
21534 SDValue Op2 = N->getOperand(2);
21540 SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
21541 SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
21542 SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
21543 SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
21544 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
21549 SDValue Scalar = N->getOperand(3);
21555 SDValue Passthru = N->getOperand(1);
21556 SDValue Pred = N->getOperand(2);
21557 return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
21564 EVT VT = N->getValueType(0);
21568 // Current lowering only supports the SVE-ACLE types.
21578 SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
21579 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
21580 SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
21593 SDValue Comparator = N->getOperand(3);
21597 EVT VT = N->getValueType(0);
21598 EVT CmpVT = N->getOperand(2).getValueType();
21599 SDValue Pred = N->getOperand(1);
21616 int64_t ImmVal = CN->getSExtValue();
21617 if (ImmVal >= -16 && ImmVal <= 15)
21630 uint64_t ImmVal = CN->getZExtValue();
21645 N->getOperand(2), Splat, DAG.getCondCode(CC));
21693 SDValue Pred = N->getOperand(1);
21694 SDValue VecToReduce = N->getOperand(2);
21698 EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
21704 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21712 SDValue Pred = N->getOperand(1);
21713 SDValue VecToReduce = N->getOperand(2);
21721 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21729 SDValue Pred = N->getOperand(1);
21730 SDValue InitVal = N->getOperand(2);
21731 SDValue VecToReduce = N->getOperand(3);
21744 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21754 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
21755 assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
21756 SDValue Pg = N->getOperand(1);
21757 SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
21758 SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
21763 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
21765 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
21778 if (!Subtarget->hasSVE2p1())
21781 if (!N->hasNUsesOfValue(2, 0))
21784 const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
21788 auto It = N->user_begin();
21792 if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
21793 Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR)
21796 uint64_t OffLo = Lo->getConstantOperandVal(1);
21797 uint64_t OffHi = Hi->getConstantOperandVal(1);
21807 EVT HalfVec = Lo->getValueType(0);
21808 if (HalfVec != Hi->getValueType(0) ||
21816 SDValue Idx = N->getOperand(1);
21817 SDValue TC = N->getOperand(2);
21824 {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
21836 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
21841 bool Scalable = N->getValueType(0).isScalableVector();
21842 if (Scalable && !Subtarget->isSVEorStreamingSVEAvailable())
21844 if (!Scalable && (!Subtarget->isNeonAvailable() || !Subtarget->hasDotProd()))
21849 SDValue Op2 = N->getOperand(2);
21850 unsigned Op2Opcode = Op2->getOpcode();
21855 MulOpLHS = Op2->getOperand(0);
21858 SDValue ExtMulOpLHS = Op2->getOperand(0);
21859 SDValue ExtMulOpRHS = Op2->getOperand(1);
21861 unsigned ExtMulOpLHSOpcode = ExtMulOpLHS->getOpcode();
21862 unsigned ExtMulOpRHSOpcode = ExtMulOpRHS->getOpcode();
21870 MulOpLHS = ExtMulOpLHS->getOperand(0);
21871 MulOpRHS = ExtMulOpRHS->getOperand(0);
21878 SDValue Acc = N->getOperand(1);
21879 EVT ReducedVT = N->getValueType(0);
21895 if (!Subtarget->hasMatMulInt8())
21898 bool Scalable = N->getValueType(0).isScalableVT();
21931 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
21936 if (!Subtarget->hasSVE2() && !Subtarget->isStreamingSVEAvailable())
21941 if (!ISD::isExtOpcode(N->getOperand(2).getOpcode()))
21943 SDValue Acc = N->getOperand(1);
21944 SDValue Ext = N->getOperand(2);
21950 SDValue ExtOp = Ext->getOperand(0);
21979 return DAG.getPartialReduceAdd(SDLoc(N), N->getValueType(0),
21980 N->getOperand(1), N->getOperand(2));
21998 return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
21999 N->getOperand(1), N->getOperand(2));
22001 return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
22002 N->getOperand(1), N->getOperand(2));
22004 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
22005 N->getOperand(1), N->getOperand(2));
22007 return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
22008 N->getOperand(1), N->getOperand(2));
22010 return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0),
22011 N->getOperand(1), N->getOperand(2));
22013 return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0),
22014 N->getOperand(1), N->getOperand(2));
22016 return DAG.getNode(AArch64ISD::PMULL, SDLoc(N), N->getValueType(0),
22017 N->getOperand(1), N->getOperand(2));
22029 return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
22030 N->getOperand(1), N->getOperand(2));
22032 return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
22033 N->getOperand(1), N->getOperand(2));
22042 if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
22067 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
22068 N->getOperand(1));
22072 return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0),
22073 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22075 return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0),
22076 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22078 return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0),
22079 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22081 return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0),
22082 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22084 return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0),
22085 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22087 return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0),
22088 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22090 return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0),
22091 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22093 return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0),
22094 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22096 return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0),
22097 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22099 return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0),
22100 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22102 return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0),
22103 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22105 return DAG.getNode(AArch64ISD::FDIV_PRED, SDLoc(N), N->getValueType(0),
22106 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22108 return DAG.getNode(AArch64ISD::FMAX_PRED, SDLoc(N), N->getValueType(0),
22109 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22111 return DAG.getNode(AArch64ISD::FMAXNM_PRED, SDLoc(N), N->getValueType(0),
22112 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22114 return DAG.getNode(AArch64ISD::FMA_PRED, SDLoc(N), N->getValueType(0),
22115 N->getOperand(1), N->getOperand(3), N->getOperand(4),
22116 N->getOperand(2));
22118 return DAG.getNode(AArch64ISD::FMIN_PRED, SDLoc(N), N->getValueType(0),
22119 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22121 return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0),
22122 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22124 return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0),
22125 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22127 return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0),
22128 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22130 return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2),
22131 N->getOperand(3));
22133 return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2),
22134 N->getOperand(3));
22138 return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), N->getOperand(2),
22139 N->getOperand(3));
22141 return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0),
22142 N->getOperand(2), N->getOperand(3));
22144 return DAG.getNode(AArch64ISD::SADDWB, SDLoc(N), N->getValueType(0),
22145 N->getOperand(1), N->getOperand(2));
22147 return DAG.getNode(AArch64ISD::SADDWT, SDLoc(N), N->getValueType(0),
22148 N->getOperand(1), N->getOperand(2));
22150 return DAG.getNode(AArch64ISD::UADDWB, SDLoc(N), N->getValueType(0),
22151 N->getOperand(1), N->getOperand(2));
22153 return DAG.getNode(AArch64ISD::UADDWT, SDLoc(N), N->getValueType(0),
22154 N->getOperand(1), N->getOperand(2));
22156 return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2),
22157 N->getOperand(3));
22159 return DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), N->getOperand(2),
22160 N->getOperand(3));
22162 return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
22163 N->getOperand(2), N->getOperand(3));
22165 return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
22166 N->getOperand(2), N->getOperand(3));
22168 return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0),
22169 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22171 return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0),
22172 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22176 return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
22177 N->getOperand(2), N->getOperand(3));
22181 return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
22182 N->getOperand(2), N->getOperand(3));
22184 return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
22185 N->getOperand(1), N->getOperand(2));
22187 return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
22188 N->getOperand(1), N->getOperand(2));
22190 return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
22191 N->getOperand(1), N->getOperand(2));
22193 return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
22194 N->getOperand(1), N->getOperand(2));
22196 return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
22197 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22199 if (!N->getOperand(2).getValueType().isFloatingPoint())
22201 N->getValueType(0), N->getOperand(1), N->getOperand(2),
22202 N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
22205 if (!N->getOperand(2).getValueType().isFloatingPoint())
22207 N->getValueType(0), N->getOperand(1), N->getOperand(2),
22208 N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
22213 N->getValueType(0), N->getOperand(1), N->getOperand(2),
22214 N->getOperand(3), DAG.getCondCode(ISD::SETGE));
22219 N->getValueType(0), N->getOperand(1), N->getOperand(2),
22220 N->getOperand(3), DAG.getCondCode(ISD::SETGT));
22225 N->getValueType(0), N->getOperand(1), N->getOperand(2),
22226 N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
22231 N->getValueType(0), N->getOperand(1), N->getOperand(2),
22232 N->getOperand(3), DAG.getCondCode(ISD::SETNE));
22236 N->getValueType(0), N->getOperand(1), N->getOperand(2),
22237 N->getOperand(3), DAG.getCondCode(ISD::SETUO));
22252 return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
22253 N->getOperand(1), N->getOperand(2), N->getOperand(3));
22275 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
22278 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
22281 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
22290 unsigned OC = N->getOpcode();
22304 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
22305 N->getOperand(0)->getOpcode() == ISD::SETCC);
22306 const SDValue SetCC = N->getOperand(0);
22310 if (!CCOp0->getValueType(0).isInteger() ||
22311 !CCOp1->getValueType(0).isInteger())
22315 cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
22323 DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
22325 DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
22328 SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
22329 cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
22335 // Convert zext(extract(shuffle a, b, [0,4,8,12])) -> and(uzp1(a, b), 255)
22340 EVT VT = N->getValueType(0);
22342 N->getOpcode() != ISD::ZERO_EXTEND ||
22343 N->getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
22346 unsigned ExtOffset = N->getOperand(0).getConstantOperandVal(1);
22350 EVT InVT = N->getOperand(0).getOperand(0).getValueType();
22351 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0).getOperand(0));
22359 Shuffle->getMask().slice(ExtOffset, VT.getVectorNumElements()), 4, Idx);
22366 Shuffle->getOperand(1).isUndef() &&
22368 Shuffle->getMask().slice(ExtOffset + VT.getVectorNumElements() / 2,
22375 Shuffle->getOperand(IsUndefDeInterleave ? 1 : 0));
22377 Shuffle->getOperand(IsUndefDeInterleave ? 0 : 1));
22385 DAG.getConstant((1 << InVT.getScalarSizeInBits()) - 1, DL, VT));
22396 EVT VT = N->getValueType(0);
22397 if (N->getOpcode() != ISD::ZERO_EXTEND ||
22401 SDValue Op = N->getOperand(0);
22402 unsigned ExtOffset = (unsigned)-1;
22428 if (ExtOffset == (unsigned)-1) {
22459 if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
22460 N->getOperand(0).getValueType().is64BitVector() &&
22461 (N->getOperand(0).getOpcode() == ISD::ABDU ||
22462 N->getOperand(0).getOpcode() == ISD::ABDS)) {
22463 SDNode *ABDNode = N->getOperand(0).getNode();
22469 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
22477 if (N->getValueType(0).isFixedLengthVector() &&
22478 N->getOpcode() == ISD::SIGN_EXTEND &&
22479 N->getOperand(0)->getOpcode() == ISD::SETCC)
22490 SDValue Bswap = N->getOperand(0);
22491 if (N->getOpcode() == ISD::ANY_EXTEND && Bswap.getOpcode() == ISD::BSWAP &&
22493 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64)) {
22495 SDValue NewAnyExtend = DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0),
22496 Bswap->getOperand(0));
22497 return DAG.getNode(AArch64ISD::REV16, SDLoc(N), N->getValueType(0),
22521 OrigAlignment, St.getMemOperand()->getFlags());
22524 if (BasePtr->getOpcode() == ISD::ADD &&
22525 isa<ConstantSDNode>(BasePtr->getOperand(1))) {
22526 BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
22527 BasePtr = BasePtr->getOperand(0);
22531 while (--NumVecElts) {
22538 St.getMemOperand()->getFlags());
22551 llvm_unreachable("No known SVE container for this MVT type");
22576 EVT VT = N->getValueType(0);
22586 SDValue Ops[] = { N->getOperand(0), // Chain
22587 N->getOperand(2), // Pg
22588 N->getOperand(3), // Base
22602 EVT VT = N->getValueType(0);
22603 EVT PtrTy = N->getOperand(3).getValueType();
22611 SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
22612 MINode->getOperand(3), DAG.getUNDEF(PtrTy),
22613 MINode->getOperand(2), PassThru,
22614 MINode->getMemoryVT(), MINode->getMemOperand(),
22631 EVT VT = N->getValueType(0);
22637 SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
22649 SDValue Data = N->getOperand(2);
22663 SDValue Ops[] = { N->getOperand(0), // Chain
22665 N->getOperand(4), // Base
22666 N->getOperand(3), // Pg
22670 return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
22676 SDValue Data = N->getOperand(2);
22678 EVT PtrTy = N->getOperand(4).getValueType();
22684 return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
22685 DAG.getUNDEF(PtrTy), MINode->getOperand(3),
22686 MINode->getMemoryVT(), MINode->getMemOperand(),
22693 /// if the zero constant is not re-used, since one instructions and one register
22739 int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
22740 if (Offset < -512 || Offset > 504)
22794 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
22811 uint64_t IndexVal = CIndex->getZExtValue();
22830 if (S->isVolatile() || S->isIndexed())
22833 SDValue StVal = S->getValue();
22849 if (!Subtarget->isMisaligned128StoreSlow())
22852 // Don't split at -Oz.
22857 // those up regresses performance on micro-benchmarks and olden/bh.
22866 if (VT.getSizeInBits() != 128 || S->getAlign() >= Align(16) ||
22867 S->getAlign() <= Align(2))
22885 SDValue BasePtr = S->getBasePtr();
22887 DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
22888 S->getAlign(), S->getMemOperand()->getFlags());
22892 S->getPointerInfo(), S->getAlign(),
22893 S->getMemOperand()->getFlags());
22897 assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
22899 // splice(pg, op1, undef) -> op1
22900 if (N->getOperand(2).isUndef())
22901 return N->getOperand(1);
22908 assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
22909 N->getOpcode() == AArch64ISD::UUNPKLO) &&
22912 // uunpklo/hi undef -> undef
22913 if (N->getOperand(0).isUndef())
22914 return DAG.getUNDEF(N->getValueType(0));
22919 if (N->getOperand(0).getOpcode() == ISD::MLOAD &&
22920 N->getOpcode() == AArch64ISD::UUNPKLO) {
22921 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0));
22922 SDValue Mask = MLD->getMask();
22925 if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD &&
22926 SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
22927 (MLD->getPassThru()->isUndef() ||
22928 isZerosVector(MLD->getPassThru().getNode()))) {
22929 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
22930 unsigned PgPattern = Mask->getConstantOperandVal(0);
22931 EVT VT = N->getValueType(0);
22941 VT, DL, MLD->getChain(), MLD->getBasePtr(), MLD->getOffset(), Mask,
22942 PassThru, MLD->getMemoryVT(), MLD->getMemOperand(),
22943 MLD->getAddressingMode(), ISD::ZEXTLOAD);
22956 if (N->getOpcode() != AArch64ISD::UZP1)
22958 SDValue Op0 = N->getOperand(0);
22959 EVT SrcVT = Op0->getValueType(0);
22960 EVT DstVT = N->getValueType(0);
22968 // uzp1(rshrnb(uunpklo(X),C), rshrnb(uunpkhi(X), C)) -> urshr(X, C)
22970 assert(N->getOpcode() == AArch64ISD::UZP1 && "Only UZP1 expected.");
22971 SDValue Op0 = N->getOperand(0);
22972 SDValue Op1 = N->getOperand(1);
22973 EVT ResVT = N->getValueType(0);
23001 // t1 = nxv8i16 add(X, 1 << (ShiftValue - 1))
23011 EVT VT = Srl->getValueType(0);
23012 if (!VT.isScalableVector() || !Subtarget->hasSVE2())
23051 SDValue Op0 = N->getOperand(0);
23052 SDValue Op1 = N->getOperand(1);
23053 EVT ResVT = N->getValueType(0);
23055 // uzp(extract_lo(x), extract_hi(x)) -> extract_lo(uzp x, x)
23067 SDValue Uzp = DAG.getNode(N->getOpcode(), DL, WidenedResVT, SourceVec,
23075 if (N->getOpcode() == AArch64ISD::UZP2)
23078 // uzp1(x, undef) -> concat(truncate(x), undef)
23146 // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y)
23165 // truncating uzp1(x, y) -> xtn(concat (x, y))
23178 // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
23231 unsigned Opc = N->getOpcode();
23249 SDValue Chain = N->getOperand(0);
23250 SDValue Pg = N->getOperand(1);
23251 SDValue Base = N->getOperand(2);
23252 SDValue Offset = N->getOperand(3);
23253 SDValue Ty = N->getOperand(4);
23255 EVT ResVT = N->getValueType(0);
23267 EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
23269 // If the predicate for the sign- or zero-extended offset is the
23270 // same as the predicate used for this load and the sign-/zero-extension
23271 // was from a 32-bits...
23292 assert(N->getOpcode() == AArch64ISD::VASHR ||
23293 N->getOpcode() == AArch64ISD::VLSHR);
23295 SDValue Op = N->getOperand(0);
23298 unsigned ShiftImm = N->getConstantOperandVal(1);
23302 if (N->getOpcode() == AArch64ISD::VASHR &&
23304 N->getOperand(1) == Op.getOperand(1))
23309 if (N->getFlags().hasExact())
23322 // sunpklo(sext(pred)) -> sext(extract_low_half(pred))
23325 if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND &&
23326 N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
23328 SDValue CC = N->getOperand(0)->getOperand(0);
23329 auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
23332 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk);
23338 /// Target-specific DAG combine function for post-increment LD1 (lane) and
23339 /// post-increment LD1R.
23347 EVT VT = N->getValueType(0);
23354 LoadSDNode *LD = dyn_cast<LoadSDNode>(N->getOperand(LoadIdx).getNode());
23358 // If the Generic combiner already helped form a pre- or post-indexed load,
23360 if (LD->isIndexed())
23366 Lane = N->getOperand(2);
23368 if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
23373 EVT MemVT = LoadSDN->getMemoryVT();
23380 for (SDUse &U : LD->uses()) {
23390 if (N->hasOneUse()) {
23391 unsigned UseOpc = N->user_begin()->getOpcode();
23396 SDValue Addr = LD->getOperand(1);
23397 SDValue Vector = N->getOperand(0);
23399 for (SDUse &Use : Addr->uses()) {
23401 if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
23405 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
23407 uint32_t IncVal = CInc->getZExtValue();
23427 Ops.push_back(LD->getOperand(0)); // Chain
23440 LoadSDN->getMemOperand());
23474 assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
23478 if (!Store->isTruncatingStore() || Store->isIndexed())
23480 SDValue Ext = Store->getValue();
23485 SDValue Orig = Ext->getOperand(0);
23486 if (Store->getMemoryVT() != Orig.getValueType())
23488 return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
23489 Store->getBasePtr(), Store->getMemOperand());
23512 EVT MemVT = LD->getMemoryVT();
23514 LD->getOriginalAlign() >= 4)
23519 SDValue Chain = LD->getChain();
23520 SDValue BasePtr = LD->getBasePtr();
23521 MachineMemOperand *MMO = LD->getMemOperand();
23522 assert(LD->getOffset().isUndef() && "undef offset expected");
23551 // nontemporal loads larger than 256-bits loads for odd types so LDNPQ 256-bit
23557 if (Subtarget->supportsAddressTopByteIgnored())
23558 performTBISimplification(N->getOperand(1), DCI, DAG);
23561 if (LD->isVolatile() || !Subtarget->isLittleEndian())
23567 if (!LD->isNonTemporal())
23570 EVT MemVT = LD->getMemoryVT();
23577 SDValue Chain = LD->getChain();
23578 SDValue BasePtr = LD->getBasePtr();
23579 SDNodeFlags Flags = LD->getFlags();
23582 // Replace any non temporal load over 256-bit with a series of 256 bit loads
23583 // and a scalar/vector load less than 256. This way we can utilize 256-bit
23589 // Create all 256-bit loads starting from offset 0 and up to Num256Loads-1*32.
23594 Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
23596 NewVT, DL, Chain, NewPtr, LD->getPointerInfo().getWithOffset(PtrOffset),
23597 NewAlign, LD->getMemOperand()->getFlags(), LD->getAAInfo());
23604 // 256-bit loads and inserting the remaining load to it. We extract the
23607 unsigned PtrOffset = (MemVT.getSizeInBits() - BitsRemaining) / 8;
23613 Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
23616 LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
23617 LD->getMemOperand()->getFlags(), LD->getAAInfo());
23653 for (SDValue Operand : Op->op_values()) {
23690 unsigned BitsPerElement = std::max(64 / NumElts, 8u); // >= 64-bit vector
23710 // create 8x 16-bit values, and the perform the vector reduce.
23730 unsigned MaxBitMask = 1u << (VecVT.getVectorNumElements() - 1);
23745 if (!Store->isTruncatingStore())
23749 SDValue VecOp = Store->getValue();
23751 EVT MemVT = Store->getMemoryVT();
23770 return DAG.getStore(Store->getChain(), DL, ExtendedBits, Store->getBasePtr(),
23771 Store->getMemOperand());
23783 SDValue Value = ST->getValue();
23786 if (ST->isVolatile() || !Subtarget->isLittleEndian() ||
23791 assert(ST->getOffset().isUndef() && "undef offset expected");
23795 Value->getOperand(0).getValueType().getVectorElementType(), 4);
23799 {UndefVector, Value->getOperand(0), DAG.getVectorIdxConstant(0, DL)});
23805 SDValue Chain = ST->getChain();
23806 MachineMemOperand *MMO = ST->getMemOperand();
23811 SDValue Ptr2 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset2, DL);
23817 SDValue Ptr1 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset1, DL);
23822 Chain = DAG.getStore(Chain, DL, E0, ST->getBasePtr(),
23832 SDValue Chain = ST->getChain();
23833 SDValue Value = ST->getValue();
23834 SDValue Ptr = ST->getBasePtr();
23850 Value.getNode()->hasOneUse() && ST->isUnindexed() &&
23851 Subtarget->useSVEForFixedLengthVectors() &&
23853 ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
23856 ST->getMemoryVT(), ST->getMemOperand());
23861 if (Subtarget->supportsAddressTopByteIgnored() &&
23862 performTBISimplification(N->getOperand(2), DCI, DAG))
23871 if (ST->isTruncatingStore()) {
23872 EVT StoreVT = ST->getMemoryVT();
23876 trySimplifySrlAddToRshrnb(ST->getOperand(1), DAG, Subtarget)) {
23877 return DAG.getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(),
23878 StoreVT, ST->getMemOperand());
23890 SDValue Value = MST->getValue();
23891 SDValue Mask = MST->getMask();
23897 if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() &&
23898 MST->isUnindexed() && Mask->getOpcode() == AArch64ISD::PTRUE &&
23907 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
23908 unsigned PgPattern = Mask->getConstantOperandVal(0);
23916 return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0),
23917 MST->getBasePtr(), MST->getOffset(), Mask,
23918 MST->getMemoryVT(), MST->getMemOperand(),
23919 MST->getAddressingMode(),
23926 if (MST->isTruncatingStore()) {
23927 EVT ValueVT = Value->getValueType(0);
23928 EVT MemVT = MST->getMemoryVT();
23932 return DAG.getMaskedStore(MST->getChain(), DL, Rshrnb, MST->getBasePtr(),
23933 MST->getOffset(), MST->getMask(),
23934 MST->getMemoryVT(), MST->getMemOperand(),
23935 MST->getAddressingMode(), true);
23953 // ->
23968 // ->
23999 while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG))
24009 EVT DataVT = N->getOperand(1).getValueType();
24011 // will later be re-extended to 64 bits in legalization
24014 if (ISD::isVectorShrinkable(Index.getNode(), 32, N->isIndexSigned())) {
24024 Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue();
24034 Stride = Step << Shift->getZExtValue();
24070 SDValue Chain = MGS->getChain();
24071 SDValue Scale = MGS->getScale();
24072 SDValue Index = MGS->getIndex();
24073 SDValue Mask = MGS->getMask();
24074 SDValue BasePtr = MGS->getBasePtr();
24075 ISD::MemIndexType IndexType = MGS->getIndexType();
24083 SDValue PassThru = MGT->getPassThru();
24086 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
24087 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
24090 SDValue Data = MSC->getValue();
24092 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
24093 DL, Ops, MSC->getMemOperand(), IndexType,
24094 MSC->isTruncatingStore());
24097 SDValue Ops[] = {Chain, HG->getInc(), Mask, BasePtr,
24098 Index, Scale, HG->getIntID()};
24099 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), HG->getMemoryVT(),
24100 DL, Ops, HG->getMemOperand(), IndexType);
24103 /// Target-specific DAG combine function for NEON load/store intrinsics
24111 unsigned AddrOpIdx = N->getNumOperands() - 1;
24112 SDValue Addr = N->getOperand(AddrOpIdx);
24115 for (SDUse &Use : Addr->uses()) {
24117 if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
24137 unsigned IntNo = N->getConstantOperandVal(1);
24186 VecTy = N->getOperand(2).getValueType();
24188 VecTy = N->getValueType(0);
24191 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
24193 uint32_t IncVal = CInc->getZExtValue();
24202 Ops.push_back(N->getOperand(0)); // Incoming chain
24206 Ops.push_back(N->getOperand(i));
24222 MemInt->getMemoryVT(),
24223 MemInt->getMemOperand());
24244 switch(V.getNode()->getOpcode()) {
24249 if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
24250 || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
24251 ExtType = LoadNode->getExtensionType();
24257 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
24258 if ((TypeNode->getVT() == MVT::i8 && width == 8)
24259 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
24266 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
24267 if ((TypeNode->getVT() == MVT::i8 && width == 8)
24268 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
24276 return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
24277 1LL << (width - 1);
24288 // +-------------+ +-------------+ +-------------+ +-------------+
24290 // +-------------+ +-------------+ +-------------+ +-------------+
24292 // V V | +----------+
24293 // +-------------+ +----+ | |
24295 // +-------------+ +----+ | |
24298 // +-------------+ | |
24300 // +-------------+ | |
24302 // +-----+ | |
24305 // +-------------+
24307 // +-------------+
24319 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
24349 // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
24359 AddConstant -= (1 << (width-1));
24365 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
24380 (AddConstant <= 0 && CompConstant >= -1 &&
24420 // (X & C) >u Mask --> (X & (C & (~Mask)) != 0
24421 // (X & C) <u Pow2 --> (X & (C & ~(Pow2-1)) == 0
24426 ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
24430 APInt SubsAP = SubsC->getAPIntValue();
24440 ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
24444 APInt MaskAP = CC == AArch64CC::HI ? SubsAP : (SubsAP - 1);
24447 APInt AndSMask = (~MaskAP) & AndC->getAPIntValue();
24449 AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
24450 DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
24453 N->getOperand(CCIndex)->getValueType(0));
24463 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
24465 return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
24473 unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
24474 SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
24475 unsigned CondOpcode = SubsNode->getOpcode();
24477 if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0) ||
24478 !SubsNode->hasOneUse())
24484 SDNode *AndNode = SubsNode->getOperand(0).getNode();
24487 if (AndNode->getOpcode() != ISD::AND)
24494 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
24495 uint32_t CNV = CN->getZExtValue();
24505 SDValue AddValue = AndNode->getOperand(0);
24512 SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
24513 SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
24514 SDValue SubsInputValue = SubsNode->getOperand(1);
24531 cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
24532 cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
24537 SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
24538 SubsNode->getValueType(1));
24539 SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
24560 SDValue Chain = N->getOperand(0);
24561 SDValue Dest = N->getOperand(1);
24562 SDValue CCVal = N->getOperand(2);
24563 SDValue Cmp = N->getOperand(3);
24566 unsigned CC = CCVal->getAsZExtVal();
24576 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
24611 unsigned CC = N->getConstantOperandVal(2);
24612 SDValue SUBS = N->getOperand(3);
24616 Zero = N->getOperand(0);
24617 CTTZ = N->getOperand(1);
24619 Zero = N->getOperand(1);
24620 CTTZ = N->getOperand(0);
24646 DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType());
24659 SDValue L = Op->getOperand(0);
24660 SDValue R = Op->getOperand(1);
24662 static_cast<AArch64CC::CondCode>(Op->getConstantOperandVal(2));
24664 SDValue OpCmp = Op->getOperand(3);
24676 SDValue X = CmpLHS->getOperand(0);
24677 SDValue Y = CmpLHS->getOperand(1);
24687 if (CX->getAPIntValue() == CY->getAPIntValue())
24691 static_cast<AArch64CC::CondCode>(CmpLHS->getConstantOperandVal(2));
24692 SDValue Cond = CmpLHS->getOperand(3);
24705 EVT VT = Op->getValueType(0);
24713 // (CSEL (ADD (ADD x y) -c) f LO (SUBS x c)) to
24717 SDValue SubsNode = N->getOperand(3);
24723 EVT VT = N->getValueType(0);
24732 DAG.getConstant(-CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst),
24733 CmpOpConst->getValueType(0));
24734 SubsOp = DAG.getConstant(CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst),
24735 CmpOpConst->getValueType(0));
24763 SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedOp);
24764 SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedOp);
24773 return N->getOperand(OpNum);
24774 SDValue Res = DAG.getNode(ISD::ADD, SDLoc(N->getOperand(OpNum)), VT,
24776 DAG.ReplaceAllUsesWith(N->getOperand(OpNum), Res);
24783 DAG.getConstant(NewCC, SDLoc(N->getOperand(2)), MVT_CC),
24787 auto CC = static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
24796 // swapped. Due to canonicalization, this only helps for non-constant
24804 if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero())
24815 auto ExpectedOp = DAG.getConstant(-NewCmpConst, SDLoc(CmpOpConst),
24816 CmpOpConst->getValueType(0));
24818 CmpOpConst->getValueType(0));
24824 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
24825 CmpOpConst->getAPIntValue() + 1, AArch64CC::LO);
24828 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
24829 CmpOpConst->getAPIntValue() + 1, AArch64CC::HS);
24831 return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
24832 CmpOpConst->getAPIntValue() - 1, AArch64CC::LS);
24834 return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
24835 CmpOpConst->getAPIntValue() - 1, AArch64CC::HI);
24837 return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
24838 CmpOpConst->getAPIntValue() - 1, AArch64CC::LE);
24840 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
24841 CmpOpConst->getAPIntValue() + 1, AArch64CC::LT);
24843 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
24844 CmpOpConst->getAPIntValue() + 1, AArch64CC::GE);
24846 return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
24847 CmpOpConst->getAPIntValue() - 1, AArch64CC::GT);
24857 // CSEL x, x, cc -> x
24858 if (N->getOperand(0) == N->getOperand(1))
24859 return N->getOperand(0);
24869 // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
24870 // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
24874 // CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x)
24876 SDValue Cond = N->getOperand(3);
24878 Cond.hasOneUse() && Cond->hasNUsesOfValue(0, 0) &&
24879 DAG.doesNodeExist(ISD::SUB, N->getVTList(),
24881 !DAG.doesNodeExist(ISD::SUB, N->getVTList(),
24885 static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
24889 SDValue Sub = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
24891 return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0),
24892 N->getOperand(1),
24901 // Try to re-use an already extended operand of a vector SetCC feeding a
24905 EVT Op0MVT = Op->getOperand(0).getValueType();
24906 if (!Op0MVT.isVector() || Op->use_empty())
24911 SDNode *FirstUse = *Op->user_begin();
24912 if (FirstUse->getOpcode() != ISD::VSELECT)
24914 EVT UseMVT = FirstUse->getValueType(0);
24917 if (any_of(Op->users(), [&UseMVT](const SDNode *N) {
24918 return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
24923 if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V))
24929 ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get();
24931 // split the SET_CC and re-use the extended version of the operand.
24933 Op->getOperand(0));
24935 Op->getOperand(0));
24938 Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1));
24941 Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
24946 Op0ExtV, Op1ExtV, Op->getOperand(2));
24952 SDValue Vec = N->getOperand(0);
24958 return getVectorBitwiseReduce(N->getOpcode(), Vec, N->getValueType(0), DL,
24968 assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
24969 SDValue LHS = N->getOperand(0);
24970 SDValue RHS = N->getOperand(1);
24971 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
24973 EVT VT = N->getValueType(0);
24980 LHS->getOpcode() == AArch64ISD::CSEL &&
24981 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
24982 LHS->hasOneUse()) {
24996 // setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
24998 LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
24999 LHS->getConstantOperandVal(1) < VT.getScalarSizeInBits() &&
25000 LHS->hasOneUse()) {
25001 EVT TstVT = LHS->getValueType(0);
25004 uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1);
25005 SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0),
25007 return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2));
25013 // setcc (iN (bitcast (vNi1 X))), -1, (eq|ne)
25014 // ==> setcc (iN (sext (i1 (vecreduce_and (vNi1 X))))), -1, (eq|ne)
25018 LHS->getOpcode() == ISD::BITCAST) {
25019 EVT ToVT = LHS->getValueType(0);
25020 EVT FromVT = LHS->getOperand(0).getValueType();
25025 DL, MVT::i1, LHS->getOperand(0));
25039 // Replace a flag-setting operator (eg ANDS) with the generic version
25045 SDValue LHS = N->getOperand(0);
25046 SDValue RHS = N->getOperand(1);
25047 EVT VT = N->getValueType(0);
25050 if (!N->hasAnyUseOfValue(1)) {
25051 SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
25056 // Combine identical generic nodes into this node, re-using the result.
25068 SDValue Pred = N->getOperand(0);
25069 SDValue LHS = N->getOperand(1);
25070 SDValue RHS = N->getOperand(2);
25071 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
25074 LHS->getOpcode() != ISD::SIGN_EXTEND)
25077 SDValue Extract = LHS->getOperand(0);
25078 if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
25079 Extract->getValueType(0) != N->getValueType(0) ||
25080 Extract->getConstantOperandVal(1) != 0)
25083 SDValue InnerSetCC = Extract->getOperand(0);
25084 if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
25095 Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
25096 Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
25104 assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
25108 SDValue Pred = N->getOperand(0);
25109 SDValue LHS = N->getOperand(1);
25110 SDValue RHS = N->getOperand(2);
25111 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
25117 LHS->getOpcode() == ISD::SIGN_EXTEND &&
25118 LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) {
25122 if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
25123 LHS->getOperand(0)->getOperand(0) == Pred)
25124 return LHS->getOperand(0);
25128 // -> nxvNi1 ...
25130 return LHS->getOperand(0);
25134 // -> nxvNi1 and(pred, ...)
25138 return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0),
25139 LHS->getOperand(0), Pred);
25152 if (!Op->hasOneUse())
25155 // We don't handle undef/constant-fold cases below, as they should have
25159 // (tbz (trunc x), b) -> (tbz x, b)
25161 if (Op->getOpcode() == ISD::TRUNCATE &&
25162 Bit < Op->getValueType(0).getSizeInBits()) {
25163 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25166 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
25167 if (Op->getOpcode() == ISD::ANY_EXTEND &&
25168 Bit < Op->getOperand(0).getValueSizeInBits()) {
25169 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25172 if (Op->getNumOperands() != 2)
25175 auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
25179 switch (Op->getOpcode()) {
25183 // (tbz (and x, m), b) -> (tbz x, b)
25185 if ((C->getZExtValue() >> Bit) & 1)
25186 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25189 // (tbz (shl x, c), b) -> (tbz x, b-c)
25191 if (C->getZExtValue() <= Bit &&
25192 (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
25193 Bit = Bit - C->getZExtValue();
25194 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25198 // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
25200 Bit = Bit + C->getZExtValue();
25201 if (Bit >= Op->getValueType(0).getSizeInBits())
25202 Bit = Op->getValueType(0).getSizeInBits() - 1;
25203 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25205 // (tbz (srl x, c), b) -> (tbz x, b+c)
25207 if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
25208 Bit = Bit + C->getZExtValue();
25209 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25213 // (tbz (xor x, -1), b) -> (tbnz x, b)
25215 if ((C->getZExtValue() >> Bit) & 1)
25217 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
25221 // Optimize test single bit zero/non-zero and branch.
25225 unsigned Bit = N->getConstantOperandVal(2);
25227 SDValue TestSrc = N->getOperand(1);
25233 unsigned NewOpc = N->getOpcode();
25244 return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
25245 DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
25254 auto SelectA = N->getOperand(1);
25255 auto SelectB = N->getOperand(2);
25256 auto NTy = N->getValueType(0);
25260 SDValue SetCC = N->getOperand(0);
25275 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
25286 // vselect (v1i1 setcc) ->
25295 SDValue N0 = N->getOperand(0);
25299 return N->getOperand(1);
25302 return N->getOperand(2);
25304 // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
25305 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
25307 SDValue SetCC = N->getOperand(0);
25313 SDNode *SplatLHS = N->getOperand(1).getNode();
25314 SDNode *SplatRHS = N->getOperand(2).getNode();
25316 if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
25326 NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
25331 auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
25343 EVT ResVT = N->getValueType(0);
25349 SDValue IfTrue = N->getOperand(1);
25350 SDValue IfFalse = N->getOperand(2);
25353 cast<CondCodeSDNode>(N0.getOperand(2))->get());
25359 /// the compare-mask instructions rather than going via NZCV, even if LHS and
25365 SDValue N0 = N->getOperand(0);
25366 EVT ResVT = N->getValueType(0);
25378 "Scalar-SETCC feeding SELECT has unexpected result type!");
25381 // largest real NEON comparison is 64-bits per lane, which means the result is
25382 // at most 32-bits and an illegal vector. Just bail out for now.
25424 return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
25429 EVT VT = N->getValueType(0);
25435 SmallVector<SDValue> Ops(N->ops());
25436 if (SDNode *LN = DCI.DAG.getNodeIfExists(N->getOpcode(),
25443 if (N->getOpcode() == AArch64ISD::DUP) {
25452 SDValue EXTRACT_VEC_ELT = N->getOperand(0);
25470 if (N->getValueType(0) == N->getOperand(0).getValueType())
25471 return N->getOperand(0);
25472 if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
25473 return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0),
25474 N->getOperand(0).getOperand(0));
25481 // globaladdr as (globaladdr + constant) - constant.
25486 if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
25490 uint64_t MinOffset = -1ull;
25491 for (SDNode *N : GN->users()) {
25492 if (N->getOpcode() != ISD::ADD)
25494 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
25496 C = dyn_cast<ConstantSDNode>(N->getOperand(1));
25499 MinOffset = std::min(MinOffset, C->getZExtValue());
25501 uint64_t Offset = MinOffset + GN->getOffset();
25505 // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
25506 if (Offset <= uint64_t(GN->getOffset()))
25521 const GlobalValue *GV = GN->getGlobal();
25522 Type *T = GV->getValueType();
25523 if (!T->isSized() ||
25524 Offset > GV->getDataLayout().getTypeAllocSize(T))
25535 SDValue BR = N->getOperand(0);
25536 if (!Subtarget->hasCSSC() || BR.getOpcode() != ISD::BITREVERSE ||
25588 OffsetConst->getZExtValue(), ScalarSizeInBytes);
25594 const SDValue Src = N->getOperand(2);
25595 const EVT SrcVT = Src->getValueType(0);
25617 SDValue Base = N->getOperand(4);
25620 SDValue Offset = N->getOperand(5);
25623 // applies to non-temporal scatters because there's no instruction that takes
25635 // In the case of non-temporal gather loads there's only one SVE instruction
25636 // per data-size: "scalar + vector", i.e.
25648 // immediates outside that range and non-immediate scalar offsets use SST1 or
25679 // Keep the original type of the input data to store - this is needed to be
25694 SDValue Ops[] = {N->getOperand(0), // Chain
25696 N->getOperand(3), // Pg
25707 const EVT RetVT = N->getValueType(0);
25719 SDValue Base = N->getOperand(3);
25722 SDValue Offset = N->getOperand(4);
25725 // offsets. This applies to non-temporal and quadword gathers, which do not
25737 // In the case of non-temporal gather loads and quadword gather loads there's
25751 // immediates outside that range and non-immediate scalar offsets use
25784 // Keep the original output value type around - this is needed to be able to
25792 SDValue Ops[] = {N->getOperand(0), // Chain
25793 N->getOperand(2), // Pg
25814 SDValue Src = N->getOperand(0);
25815 unsigned Opc = Src->getOpcode();
25817 // Sign extend of an unsigned unpack -> signed unpack
25823 // Push the sign extend to the operand of the unpack
25827 // ->
25829 // ->
25831 SDValue ExtOp = Src->getOperand(0);
25832 auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
25844 return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
25919 EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
25920 EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
25925 EVT DstVT = N->getValueType(0);
25929 for (unsigned I = 0; I < Src->getNumOperands(); ++I)
25930 Ops.push_back(Src->getOperand(I));
25941 /// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
25945 SDValue Offset = N->getOperand(OffsetPos);
25951 // Extend the unpacked offset vector to 64-bit lanes.
25954 SmallVector<SDValue, 5> Ops(N->ops());
25955 // Replace the offset operand with the 64-bit one.
25958 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
25970 if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
25974 SmallVector<SDValue, 5> Ops(N->ops());
25982 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
26011 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
26012 SDValue InsertVec = N->getOperand(0);
26013 SDValue InsertElt = N->getOperand(1);
26014 SDValue InsertIdx = N->getOperand(2);
26033 // If we get here we are effectively trying to zero lanes 1-N of a vector.
26036 if (N->getValueType(0) != ExtractVec.getValueType())
26057 SDValue N0 = N->getOperand(0);
26058 EVT VT = N->getValueType(0);
26061 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
26069 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
26073 N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
26075 VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) {
26078 LN0->getChain(), LN0->getBasePtr(),
26079 N0.getValueType(), LN0->getMemOperand());
26094 EVT VT = N->getValueType(0);
26097 if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
26102 SDValue Mask = N->getOperand(0);
26103 SDValue In1 = N->getOperand(1);
26104 SDValue In2 = N->getOperand(2);
26113 EVT VT = N->getValueType(0);
26115 SDValue Insert = N->getOperand(0);
26123 uint64_t IdxDupLane = N->getConstantOperandVal(1);
26141 DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
26143 NewInsert, N->getOperand(1));
26154 SDValue LHS = N->getOperand(0);
26155 SDValue RHS = N->getOperand(1);
26183 // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
26206 if (ExtractHighSrcVec->use_size() != 2)
26210 for (SDNode *User : ExtractHighSrcVec.getNode()->users()) {
26214 if (User->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
26215 !isNullConstant(User->getOperand(1))) {
26223 if (!ExtractLow || !ExtractLow->hasOneUse())
26228 SDNode *ExtractLowUser = *ExtractLow.getNode()->user_begin();
26229 if (ExtractLowUser->getOpcode() != N->getOpcode()) {
26232 if (ExtractLowUser->getOperand(0) == ExtractLow) {
26233 if (ExtractLowUser->getOperand(1).getOpcode() == ISD::TRUNCATE)
26234 TruncLow = ExtractLowUser->getOperand(1);
26238 if (ExtractLowUser->getOperand(0).getOpcode() == ISD::TRUNCATE)
26239 TruncLow = ExtractLowUser->getOperand(0);
26248 // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
26311 EVT VT = N->getValueType(0);
26315 SDValue ZEXT = N->getOperand(0);
26347 /// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2))
26356 SDValue Op0 = N->getOperand(0);
26360 SDValue C1 = Op0->getOperand(1);
26361 SDValue C2 = N->getOperand(1);
26366 if (N->hasOneUse()) {
26367 unsigned UseOpc = N->user_begin()->getOpcode();
26374 EVT VT = N->getValueType(0);
26377 // DAGCombiner will simplify (and (op x...), (op y...)) -> (op (and x, y))
26383 SDValue X = Op0->getOperand(0);
26391 switch (N->getOpcode()) {
26424 APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
26426 APInt::getAllOnes(N->getValueType(0).getVectorNumElements());
26549 switch (N->getConstantOperandVal(1)) {
26700 unsigned IntrinsicID = N->getConstantOperandVal(1);
26707 N->getOperand(0), DAG.getConstant(Register, DL, MVT::i32));
26717 DAG.getVTList(MVT::Other), N->getOperand(0),
26718 N->getOperand(2), N->getOperand(3));
26721 DAG.getVTList(MVT::Other), N->getOperand(0),
26722 N->getOperand(2), N->getOperand(3));
26740 // we can't perform a tail-call. In particular, we need to check for
26745 if (N->getNumValues() != 1)
26747 if (!N->hasNUsesOfValue(1, 0))
26751 SDNode *Copy = *N->user_begin();
26752 if (Copy->getOpcode() == ISD::CopyToReg) {
26755 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
26758 TCChain = Copy->getOperand(0);
26759 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
26763 for (SDNode *Node : Copy->users()) {
26764 if (Node->getOpcode() != AArch64ISD::RET_GLUE)
26780 bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
26781 return CI->isTailCall();
26788 if (!CstOffset || CstOffset->isZero())
26794 return isInt<9>(CstOffset->getSExtValue());
26801 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
26804 // Non-null if there is exactly one user of the loaded value (ignoring chain).
26806 for (SDUse &U : N->uses()) {
26812 ValOnlyUser = nullptr; // Multiple non-chain uses, bail out.
26823 if (ValOnlyUser && ValOnlyUser->getValueType(0).isScalableVector() &&
26824 (ValOnlyUser->getOpcode() == ISD::SPLAT_VECTOR ||
26825 (ValOnlyUser->getOpcode() == AArch64ISD::DUP_MERGE_PASSTHRU &&
26826 IsUndefOrZero(ValOnlyUser->getOperand(2)))))
26829 Base = Op->getOperand(0);
26832 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
26833 int64_t RHSC = RHS->getSExtValue();
26834 if (Op->getOpcode() == ISD::SUB)
26835 RHSC = -(uint64_t)RHSC;
26838 // Always emit pre-inc/post-inc addressing mode. Use negated constant offset
26840 Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0));
26853 VT = LD->getMemoryVT();
26854 Ptr = LD->getBasePtr();
26856 VT = ST->getMemoryVT();
26857 Ptr = ST->getBasePtr();
26873 VT = LD->getMemoryVT();
26874 Ptr = LD->getBasePtr();
26876 VT = ST->getMemoryVT();
26877 Ptr = ST->getBasePtr();
26883 // Post-indexing updates the base, so it's not a valid transform
26895 SDValue Op = N->getOperand(0);
26896 EVT VT = N->getValueType(0);
26923 SDValue Op = N->getOperand(0);
26924 EVT VT = N->getValueType(0);
26937 SDValue Op = N->getOperand(0);
26938 EVT VT = N->getValueType(0);
26958 "Expected fp->int bitcast!");
26989 EVT VT = N->getValueType(0);
26992 !N->getFlags().hasAllowReassociation()) ||
26993 (VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
26997 SDValue X = N->getOperand(0);
26998 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(1));
27000 Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
27001 X = N->getOperand(1);
27006 if (Shuf->getOperand(0) != X || !Shuf->getOperand(1)->isUndef())
27010 ArrayRef<int> Mask = Shuf->getMask();
27012 if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1))
27041 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
27050 SDValue In = N->getOperand(0);
27058 EVT VT = N->getValueType(0);
27067 auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
27071 unsigned Index = CIndex->getZExtValue();
27078 SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
27101 assert(N->getValueType(0) == MVT::i128 &&
27104 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
27105 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
27106 // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
27109 createGPRPairNode(DAG, N->getOperand(2)), // Compare value
27110 createGPRPairNode(DAG, N->getOperand(3)), // Store value
27111 N->getOperand(1), // Ptr
27112 N->getOperand(0), // Chain in
27116 switch (MemOp->getMergedOrdering()) {
27152 switch (MemOp->getMergedOrdering()) {
27171 auto Desired = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
27172 auto New = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
27173 SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
27174 New.first, New.second, N->getOperand(0)};
27188 // LowerATOMIC_LOAD_AND). We can't take that approach with 128-bit, because
27189 // the type is not legal. Therefore we shouldn't expect to see a 128-bit
27264 // LSE128 has a 128-bit RMW ops, but i128 is not a legal type, so lower it
27272 assert(N->getValueType(0) == MVT::i128 &&
27275 if (!Subtarget->hasLSE128())
27278 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
27279 const SDValue &Chain = N->getOperand(0);
27280 const SDValue &Ptr = N->getOperand(1);
27281 const SDValue &Val128 = N->getOperand(2);
27285 const unsigned ISDOpcode = N->getOpcode();
27287 getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering());
27293 DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.first);
27296 DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.second);
27319 switch (N->getOpcode()) {
27378 assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
27385 assert(N->getValueType(0) != MVT::i128 &&
27386 "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
27391 assert(cast<AtomicSDNode>(N)->getVal().getValueType() == MVT::i128 &&
27392 "Expected 128-bit atomicrmw.");
27400 EVT MemVT = LoadNode->getMemoryVT();
27401 // Handle lowering 256 bit non temporal loads into LDNP for little-endian
27403 if (LoadNode->isNonTemporal() && Subtarget->isLittleEndian() &&
27415 {LoadNode->getChain(), LoadNode->getBasePtr()},
27416 LoadNode->getMemoryVT(), LoadNode->getMemOperand());
27424 if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
27425 LoadNode->getMemoryVT() != MVT::i128) {
27426 // Non-volatile or atomic loads are optimized later in AArch64's load/store
27434 AN && AN->getSuccessOrdering() == AtomicOrdering::Acquire;
27438 assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
27442 {LoadNode->getChain(), LoadNode->getBasePtr()},
27443 LoadNode->getMemoryVT(), LoadNode->getMemOperand());
27449 Result.getValue(FirstRes), Result.getValue(1 - FirstRes));
27460 // CONCAT_VECTORS -- but delegate to common code for result type
27464 EVT VT = N->getValueType(0);
27467 static_cast<Intrinsic::ID>(N->getConstantOperandVal(0));
27475 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
27477 N->getOperand(1), Op2, N->getOperand(3));
27485 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
27487 N->getOperand(1), Op2, N->getOperand(3));
27496 N->getOperand(1), N->getOperand(2));
27505 N->getOperand(1), N->getOperand(2));
27513 getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
27529 auto V = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, NewVT, N->ops());
27537 assert(N->getValueType(0) == MVT::i128 &&
27538 "READ_REGISTER custom lowering is only for 128-bit sysregs");
27539 SDValue Chain = N->getOperand(0);
27540 SDValue SysRegName = N->getOperand(1);
27547 // of the 128-bit System Register value.
27558 if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
27580 // In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
27581 // provided the address is 16-byte aligned.
27583 if (!Subtarget->hasLSE2())
27587 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
27588 LI->getAlign() >= Align(16);
27591 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
27592 SI->getAlign() >= Align(16);
27598 if (!Subtarget->hasLSE128())
27604 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
27605 SI->getAlign() >= Align(16) &&
27606 (SI->getOrdering() == AtomicOrdering::Release ||
27607 SI->getOrdering() == AtomicOrdering::SequentiallyConsistent);
27610 return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
27611 RMW->getAlign() >= Align(16) &&
27612 (RMW->getOperation() == AtomicRMWInst::Xchg ||
27613 RMW->getOperation() == AtomicRMWInst::And ||
27614 RMW->getOperation() == AtomicRMWInst::Or);
27620 if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
27624 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
27625 LI->getAlign() >= Align(16) &&
27626 LI->getOrdering() == AtomicOrdering::Acquire;
27629 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
27630 SI->getAlign() >= Align(16) &&
27631 SI->getOrdering() == AtomicOrdering::Release;
27649 // Store-Release instructions only provide seq_cst guarantees when paired with
27650 // Load-Acquire instructions. MSVC CRT does not use these instructions to
27653 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
27656 switch (I->getOpcode()) {
27660 return cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() ==
27663 return cast<AtomicRMWInst>(I)->getOrdering() ==
27666 return cast<StoreInst>(I)->getOrdering() ==
27671 // Loads and stores less than 128-bits are already atomic; ones above that
27676 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
27688 // Loads and stores less than 128-bits are already atomic; ones above that
27693 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
27703 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
27707 // succeed. So at -O0 lower this operation to a CAS loop.
27713 return Subtarget->hasLSE() ? AtomicExpansionKind::CmpXChg
27721 if (!RMW->isFloatingPointOperation())
27723 switch (RMW->getType()->getScalarType()->getTypeID()) {
27739 // However, with the LSE instructions (or outline-atomics mode, which provides
27740 // library routines in place of the LSE-instructions), we can directly emit many
27744 Type *Ty = AI->getType();
27745 unsigned Size = Ty->getPrimitiveSizeInBits();
27748 bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
27749 (AI->getOperation() == AtomicRMWInst::Xchg ||
27750 AI->getOperation() == AtomicRMWInst::Or ||
27751 AI->getOperation() == AtomicRMWInst::And);
27757 if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
27758 !AI->isFloatingPointOperation()) {
27759 if (Subtarget->hasLSE())
27761 if (Subtarget->outlineAtomics()) {
27765 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
27766 // (2) low level libgcc and compiler-rt support implemented by:
27768 if (AI->getOperation() != AtomicRMWInst::Min &&
27769 AI->getOperation() != AtomicRMWInst::Max &&
27770 AI->getOperation() != AtomicRMWInst::UMin &&
27771 AI->getOperation() != AtomicRMWInst::UMax) {
27777 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
27781 // succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
27784 Subtarget->hasLSE() || rmwOpMayLowerToLibcall(*Subtarget, AI))
27794 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
27796 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
27800 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
27804 // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
27806 unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
27816 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
27819 // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
27822 if (ValueTy->getPrimitiveSizeInBits() == 128) {
27841 Type *Tys[] = { Addr->getType() };
27845 const DataLayout &DL = M->getDataLayout();
27847 CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
27848 CI->addParamAttr(0, Attribute::get(Builder.getContext(),
27850 Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
27863 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
27869 if (Val->getType()->getPrimitiveSizeInBits() == 128) {
27873 Type *Int64Ty = Type::getInt64Ty(M->getContext());
27874 Type *Int128Ty = Type::getInt128Ty(M->getContext());
27886 Type *Tys[] = { Addr->getType() };
27889 const DataLayout &DL = M->getDataLayout();
27890 IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
27893 CallInst *CI = Builder.CreateCall(
27895 Val, Stxr->getFunctionType()->getParamType(0)),
27897 CI->addParamAttr(1, Attribute::get(Builder.getContext(),
27898 Attribute::ElementType, Val->getType()));
27899 return CI;
27905 if (!Ty->isArrayTy()) {
27906 const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
27922 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
27935 if (Subtarget->isTargetAndroid())
27940 if (Subtarget->isTargetFuchsia())
27941 return UseTlsOffset(IRB, -0x10);
27948 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
27955 M.getOrInsertFunction(Subtarget->getSecurityCheckCookieName(),
27959 F->setCallingConv(CallingConv::Win64);
27960 F->addParamAttr(0, Attribute::AttrKind::InReg);
27969 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
27976 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
27977 return M.getFunction(Subtarget->getSecurityCheckCookieName());
27986 if (Subtarget->isTargetAndroid())
27991 if (Subtarget->isTargetFuchsia())
27992 return UseTlsOffset(IRB, -0x8);
28023 return Mask->getValue().isPowerOf2();
28043 !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
28051 AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
28052 AFI->setIsSplitCSR(true);
28058 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
28059 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
28063 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
28064 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
28065 MachineBasicBlock::iterator MBBI = Entry->begin();
28075 Register NewVR = MRI->createVirtualRegister(RC);
28077 // FIXME: this currently does not emit CFI pseudo-instructions, it works
28078 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
28080 // CFI pseudo-instructions.
28081 assert(Entry->getParent()->getFunction().hasFnAttribute(
28084 Entry->addLiveIn(*I);
28085 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
28088 // Insert the copy-back instructions right before the terminator.
28090 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
28091 TII->get(TargetOpcode::COPY), *I)
28101 // integer division, leaving the division as-is is a loss even in terms of
28110 // Avoid merging stores into fixed-length vectors when Neon is unavailable.
28114 if (MemVT.isFixedLengthVector() && !Subtarget->isNeonAvailable())
28124 // We want inc-of-add for scalars and sub-of-not for vectors.
28132 if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
28149 assert(MBBI->isCall() && MBBI->getCFIType() &&
28152 switch (MBBI->getOpcode()) {
28164 MachineOperand &Target = MBBI->getOperand(0);
28168 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(AArch64::KCFI_CHECK))
28170 .addImm(MBBI->getCFIType())
28175 return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
28180 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
28238 if (GV.isThreadLocal() && Subtarget->isTargetMachO())
28244 const ConstantInt *CI;
28248 CI = MI.getOperand(1).getCImm();
28255 auto APF = MI.getOperand(1).getFPImm()->getValueAPF();
28261 CI =
28266 APInt Imm = CI->getValue();
28267 InstructionCost Cost = TTI->getIntImmCost(
28268 Imm, CI->getType(), TargetTransformInfo::TCK_CodeSize);
28277 --MaxUses;
28298 if (Inst.getType()->isScalableTy()) {
28303 if (Inst.getOperand(i)->getType()->isScalableTy())
28307 if (AI->getAllocatedType()->isScalableTy())
28332 llvm_unreachable("unexpected element type for SVE container");
28444 EVT MemVT = Load->getMemoryVT();
28454 LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
28455 DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
28456 Load->getAddressingMode(), Load->getExtensionType());
28459 if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
28461 Load->getMemoryVT().getVectorElementType());
28502 SDValue Mask = Load->getMask();
28506 assert(Load->getExtensionType() != ISD::NON_EXTLOAD &&
28515 if (Load->getPassThru()->isUndef()) {
28523 if (isZerosVector(Load->getPassThru().getNode()))
28528 ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
28529 Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
28530 Load->getAddressingMode(), Load->getExtensionType());
28535 convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
28550 EVT VT = Store->getValue().getValueType();
28552 EVT MemVT = Store->getMemoryVT();
28555 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
28557 if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
28559 Store->getMemoryVT().getVectorElementType());
28572 return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
28573 Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
28574 Store->getMemOperand(), Store->getAddressingMode(),
28575 Store->isTruncatingStore());
28583 EVT VT = Store->getValue().getValueType();
28586 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
28587 SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
28590 Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
28591 Mask, Store->getMemoryVT(), Store->getMemOperand(),
28592 Store->getAddressingMode(), Store->isTruncatingStore());
28677 llvm_unreachable("unimplemented container type");
28710 llvm_unreachable("unimplemented container type");
28741 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
28754 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
28773 assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
28778 for (const SDValue &V : Op->op_values()) {
28785 EVT VTArg = VTNode->getVT().getVectorElementType();
28792 "Expected only legal fixed-width types");
28806 for (const SDValue &V : Op->op_values()) {
28816 return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
28831 for (const SDValue &V : Op->op_values()) {
28834 // Pass through non-vector operands.
28931 /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
28936 // Lower VECREDUCE_ADD of nxv2i1-nxv16i1 to CNTP rather than UADDV.
28977 SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
28978 SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
29036 unsigned NumOperands = Op->getNumOperands();
29051 Op->getOperand(I), Op->getOperand(I + 1)));
29193 SDValue Chain = HG->getChain();
29194 SDValue Inc = HG->getInc();
29195 SDValue Mask = HG->getMask();
29196 SDValue Ptr = HG->getBasePtr();
29197 SDValue Index = HG->getIndex();
29198 SDValue Scale = HG->getScale();
29199 SDValue IntID = HG->getIntID();
29204 assert(CID->getZExtValue() == Intrinsic::experimental_vector_histogram_add &&
29210 EVT MemVT = EVT::getVectorVT(Ctx, HG->getMemoryVT(), EC);
29222 MachineMemOperand *MMO = HG->getMemOperand();
29225 MMO->getPointerInfo(), MachineMemOperand::MOLoad, MMO->getSize(),
29226 MMO->getAlign(), MMO->getAAInfo());
29227 ISD::MemIndexType IndexType = HG->getIndexType();
29244 MMO->getPointerInfo(), MachineMemOperand::MOStore, MMO->getSize(),
29245 MMO->getAlign(), MMO->getAAInfo());
29330 // Bail out for 8-bits element types, because with 2048-bit SVE register
29341 // is not known at compile-time, we need to maintain a mask with 'VL' values
29345 Index += IndexLen - ElementsPerVectorReg;
29347 Index = Index - ElementsPerVectorReg;
29352 // For 8-bit elements and 1024-bit SVE registers and MaxOffset equals
29360 // Choosing an out-of-range index leads to the lane being zeroed vs zero
29362 // index elements. For i8 elements an out-of-range index could be a valid
29363 // for 2048-bit vector register size.
29364 for (unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
29414 auto ShuffleMask = SVN->getMask();
29424 auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT {
29430 if (SVN->isSplat()) {
29431 unsigned Lane = std::max(0, SVN->getSplatIndex());
29442 Imm == VT.getVectorNumElements() - 1) {
29448 DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
29473 if (Subtarget->hasSVE2p1() && EltSize == 64 &&
29515 // are actually sub-vectors of a larger SVE register. When mapping
29523 // when converting from fixed-length to scalable vector types (i.e. the start
29525 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
29526 unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
29563 // 128-bits.
29564 if (MinSVESize || !Subtarget->isNeonAvailable())
29604 if (Subtarget->isLittleEndian() ||
29647 SDValue ShiftR = Op->getOperand(0);
29648 if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
29654 unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
29655 unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
29672 // used - simplify to just Val.
29673 return TLO.CombineTo(Op, ShiftR->getOperand(0));
29682 (Op->getConstantOperandAPInt(1) << Op->getConstantOperandAPInt(2))
29693 unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
29705 Known.Zero.setHighBits(BitWidth - RequiredBits);
29724 return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
29725 Subtarget->hasComplxNum();
29736 if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
29739 auto *ScalarTy = VTy->getScalarType();
29740 unsigned NumElements = VTy->getElementCount().getKnownMinValue();
29744 // power-of-2 size, as we later split them into the smallest supported size
29746 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
29747 if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) ||
29751 if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
29752 unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
29763 return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
29764 ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
29771 VectorType *Ty = cast<VectorType>(InputA->getType());
29774 bool IsScalable = Ty->isScalableTy();
29775 bool IsInt = Ty->getElementType()->isIntegerTy();
29778 Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();
29784 int Stride = Ty->getElementCount().getKnownMinValue() / 2;
29785 int AccStride = cast<VectorType>(Accumulator->getType())
29786 ->getElementCount()
29799 FullTy = Accumulator->getType();
29801 cast<VectorType>(Accumulator->getType()));
29824 auto *Mask = B.getAllOnesMask(Ty->getElementCount());
29849 auto *Mask = B.getAllOnesMask(Ty->getElementCount());
29872 Intrinsic::aarch64_sve_cdot, Accumulator->getType(),
29880 unsigned Opc = N->getOpcode();
29882 if (any_of(N->users(),
29883 [&](SDNode *Use) { return Use->getOpcode() == ISD::MUL; }))
29890 return Subtarget->getMinimumJumpTableEntries();
29898 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
29913 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
29932 assert(Subtarget->useSVEForFixedLengthVectors() && "Unexpected mode!");
29991 return !Subtarget->isTargetWindows() &&
29992 MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
30009 switch (N->getOpcode()) {
30016 assert(N->getNumValues() == 1 && "Expected one result!");
30017 assert(N->getNumOperands() == 2 && "Expected two operands!");
30018 EVT VT = N->getValueType(0);
30019 EVT Op0VT = N->getOperand(0).getValueType();
30020 EVT Op1VT = N->getOperand(1).getValueType();
30037 assert(N->getNumValues() == 1 && "Expected one result!");
30038 assert(N->getNumOperands() == 1 && "Expected one operand!");
30039 EVT VT = N->getValueType(0);
30040 EVT OpVT = N->getOperand(0).getValueType();
30055 assert(N->getNumValues() == 1 && "Expected one result!");
30056 assert(N->getNumOperands() == 2 && "Expected two operands!");
30057 EVT VT = N->getValueType(0);
30058 EVT Op0VT = N->getOperand(0).getValueType();
30059 EVT Op1VT = N->getOperand(1).getValueType();
30066 assert(N->getNumValues() == 1 && "Expected one result!");
30067 assert(N->getNumOperands() == 2 && "Expected two operands!");
30068 EVT VT = N->getValueType(0);
30069 EVT Op0VT = N->getOperand(0).getValueType();
30070 EVT Op1VT = N->getOperand(1).getValueType();
30079 assert(Op1VT == MVT::i32 && isa<ConstantSDNode>(N->getOperand(1)) &&