Lines Matching +full:un +full:- +full:approved
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
109 #define DEBUG_TYPE "aarch64-lower"
119 "aarch64-elf-ldtls-generation", cl::Hidden,
124 EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
134 EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
139 static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
146 static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
154 "aarch64-enable-gisel-sve", cl::Hidden,
349 // Otherwise, it's either a constant discriminator, or a non-blended
351 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
352 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
353 AddrDisc = Disc->getOperand(1);
354 ConstDisc = Disc->getOperand(2);
360 // discriminator value) isn't a 16-bit constant, bail out, and let the
363 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
364 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
369 AddrDisc = DAG->getRegister(AArch64::NoRegister, MVT::i64);
372 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
383 // vector to all-one or all-zero.
390 if (Subtarget->hasLS64()) {
396 if (Subtarget->hasFPARMv8()) {
404 if (Subtarget->hasNEON()) {
427 if (Subtarget->isSVEorStreamingSVEAvailable()) {
452 if (Subtarget->useSVEForFixedLengthVectors()) {
463 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
473 computeRegisterProperties(Subtarget->getRegisterInfo());
562 // Lowering for many of the conversions is actually specified by the non-f128
588 if (Subtarget->hasFPARMv8()) {
594 if (Subtarget->hasFPARMv8()) {
612 // Variable-sized objects.
630 // AArch64 lacks both left-rotate and popcount instructions.
648 if (Subtarget->hasCSSC()) {
727 if (Subtarget->hasFullFP16()) {
800 // Round-to-integer need custom lowering for fp16, as Promote doesn't work
855 if (!Subtarget->hasFullFP16()) {
862 // AArch64 has implementations of a lot of rounding-like FP operations.
876 if (Subtarget->hasFullFP16())
885 if (Subtarget->hasFullFP16())
902 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
914 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
963 if (Subtarget->hasLSE128()) {
971 // 128-bit loads and stores can be done without expanding
975 // Aligned 128-bit loads and stores are single-copy atomic according to the
976 // v8.4a spec. LRCPC3 introduces 128-bit STILP/LDIAPP but still requires LSE2.
977 if (Subtarget->hasLSE2()) {
982 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
983 // custom lowering, as there are no un-paired non-temporal stores and
994 // 256 bit non-temporal loads can be lowered to LDNP. This is done using
995 // custom lowering, as there are no un-paired non-temporal loads legalization
1019 // Make floating-point constants legal for the large code model, so they don't
1021 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
1026 // AArch64 does not have floating-point extending loads, i1 sign-extending
1027 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
1046 if (Subtarget->hasFPARMv8()) {
1087 // Vector add and sub nodes may conceal a high-half opportunity.
1134 Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
1139 Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
1146 Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
1161 if (!Subtarget->isTargetWindows())
1178 if (Subtarget->isNeonAvailable()) {
1181 // clang-format off
1203 // clang-format on
1211 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1216 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1217 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1218 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1224 if (Subtarget->hasFullFP16()) {
1264 // Custom handling for some quad-vector types to detect MULL.
1294 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1347 // AArch64 has implementations of a lot of rounding-like FP operations.
1355 if (Subtarget->hasFullFP16())
1364 if (Subtarget->hasFullFP16())
1402 Subtarget->isLittleEndian() ? Legal : Expand);
1413 if (Subtarget->hasSME()) {
1419 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1429 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1492 if (!Subtarget->isLittleEndian())
1495 if (Subtarget->hasSVE2() ||
1496 (Subtarget->hasSME() && Subtarget->isStreaming()))
1574 // SVE supports truncating stores of 64 and 128-bit vectors
1655 if (!Subtarget->isLittleEndian())
1666 if (!Subtarget->isLittleEndian())
1680 // NEON doesn't support 64-bit vector integer muls, but SVE does.
1686 if (Subtarget->useSVEForFixedLengthVectors()) {
1689 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
1694 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
1759 // Handle operations that are only available in non-streaming SVE mode.
1760 if (Subtarget->isSVEAvailable()) {
1778 if (Subtarget->hasSVE2())
1784 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1791 if (Subtarget->hasSVE()) {
1798 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1803 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1805 if (Subtarget->isTargetWindows()) {
1817 if (Subtarget->isWindowsArm64EC()) {
1858 // But we do support custom-lowering for FCOPYSIGN.
1862 Subtarget->hasFullFP16()))
1911 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1934 // * The lowering of the non-strict versions involves target-specific ISD
1940 if (Subtarget->isLittleEndian()) {
1948 if (Subtarget->hasD128()) {
1956 // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1957 if (!Subtarget->hasSVE())
1961 // whilelo instruction for generating fixed-width predicates too.
1975 if (!Subtarget->isSVEorStreamingSVEAvailable())
1979 // also support fixed-width predicates.
2020 // Mark floating-point truncating stores/extending loads as having custom
2033 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2129 if (Subtarget->isNeonAvailable())
2135 if (Subtarget->isNeonAvailable())
2148 // isIntImmediate - This method tests to see if the node is a constant
2152 Imm = C->getZExtValue();
2158 // isOpcWithIntImmediate - This method tests to see if the node is a specific
2163 return N->getOpcode() == Opc &&
2164 isIntImmediate(N->getOperand(1).getNode(), Imm);
2172 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
2187 // The goal here is to set the non-demanded bits in a way that minimizes
2189 // we set the non-demanded bits to the value of the preceding demanded bits.
2191 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
2197 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2200 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2205 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
2210 // We cannot shrink the element size any further if it is 2-bits.
2245 // If the new constant immediate is all-zeros or all-ones, let the target
2301 uint64_t Imm = C->getZExtValue();
2305 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
2325 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2326 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2333 ~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
2334 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2340 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2341 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2347 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2348 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2354 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2355 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2361 APInt(Known.getBitWidth(), Op->getConstantOperandVal(0)));
2366 if (!Subtarget->isTargetILP32())
2368 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
2373 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2379 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
2385 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
2387 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
2405 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - Bound);
2414 // bits larger than the element datatype. 32-bit or larget doesn't need
2420 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
2424 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
2459 // Compares return either 0 or all-ones
2474 if (Subtarget->requiresStrictAlign())
2478 // Some CPUs are fine with unaligned stores except for 128-bit ones.
2479 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
2489 // them regresses performance on micro-benchmarks and olden/bh.
2499 if (Subtarget->requiresStrictAlign())
2503 // Some CPUs are fine with unaligned stores except for 128-bit ones.
2504 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
2515 // them regresses performance on micro-benchmarks and olden/bh.
2865 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2877 MachineFunction *MF = MBB->getParent();
2878 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2879 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2881 MachineFunction::iterator It = ++MBB->getIterator();
2889 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2890 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2891 MF->insert(It, TrueBB);
2892 MF->insert(It, EndBB);
2894 // Transfer rest of current basic-block to EndBB
2895 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2896 MBB->end());
2897 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2899 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2900 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2901 MBB->addSuccessor(TrueBB);
2902 MBB->addSuccessor(EndBB);
2905 TrueBB->addSuccessor(EndBB);
2908 TrueBB->addLiveIn(AArch64::NZCV);
2909 EndBB->addLiveIn(AArch64::NZCV);
2912 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2925 BB->getParent()->getFunction().getPersonalityFn())) &&
2933 MachineFunction &MF = *MBB->getParent();
2935 DebugLoc DL = MBB->findDebugLoc(MBBI);
2943 return NextInst->getParent();
2950 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2951 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2966 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2968 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));
2984 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2987 MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode))
3000 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3001 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
3032 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3034 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
3050 MachineFunction *MF = BB->getParent();
3051 MachineFrameInfo &MFI = MF->getFrameInfo();
3052 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3053 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3055 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3057 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui))
3061 // Set the reserved bytes (10-15) to zero
3062 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui))
3066 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui))
3073 BB->remove_instr(&MI);
3080 MachineFunction *MF = BB->getParent();
3081 MachineFrameInfo &MFI = MF->getFrameInfo();
3082 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3087 assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() &&
3090 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3093 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3094 MachineRegisterInfo &MRI = MF->getRegInfo();
3099 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), SP)
3102 // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
3105 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MSUBXrrr), Dest)
3109 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
3117 BB->remove_instr(&MI);
3125 if (SMEOrigInstr != -1) {
3126 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3128 TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
3160 // has implicit def. This def is early-clobber as it will be set at
3216 //===----------------------------------------------------------------------===//
3218 //===----------------------------------------------------------------------===//
3220 //===----------------------------------------------------------------------===//
3222 //===----------------------------------------------------------------------===//
3234 /// isZerosVector - Check whether SDNode N is a zero-filled vector.
3237 while (N->getOpcode() == ISD::BITCAST)
3238 N = N->getOperand(0).getNode();
3243 if (N->getOpcode() != AArch64ISD::DUP)
3246 auto Opnd0 = N->getOperand(0);
3250 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
3279 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
3369 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
3395 // All of the compare-mask comparisons are ordered, but we can switch
3418 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
3424 // So, finally, the only LLVM-native comparisons that don't mention C or V
3523 /// - We can implement (NEG SETCC) i.e. negating a single comparison by
3525 /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
3528 /// - Note that we can only ever negate all previously processed results.
3530 /// of two sub-trees (because the negation affects all sub-trees emitted so
3531 /// far, so the 2nd sub-tree we emit would also affect the first).
3533 /// - (OR (SETCC A) (SETCC B)) can be implemented via:
3535 /// - After transforming OR to NEG/AND combinations we may be able to use NEG
3578 APInt Imm = Const->getAPIntValue();
3579 if (Imm.isNegative() && Imm.sgt(-32)) {
3581 RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
3605 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
3608 /// Negate==true on this sub-tree)
3622 unsigned Opcode = Val->getOpcode();
3624 if (Val->getOperand(0).getValueType() == MVT::f128)
3635 SDValue O0 = Val->getOperand(0);
3636 SDValue O1 = Val->getOperand(1);
3655 // the leafs, then this sub-tree as a whole negates naturally.
3657 // If we cannot naturally negate the whole sub-tree, then this must be
3677 /// \p Negate is true if we want this sub-tree being negated just by changing
3683 unsigned Opcode = Val->getOpcode();
3685 SDValue LHS = Val->getOperand(0);
3686 SDValue RHS = Val->getOperand(1);
3687 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
3720 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
3724 SDValue LHS = Val->getOperand(0);
3731 SDValue RHS = Val->getOperand(1);
3738 // Swap sub-tree that must come first to the right side.
3751 // Swap the sub-tree that we can negate naturally to the left.
3760 // Negate the left sub-tree if possible, otherwise negate the result.
3776 // Emit sub-trees.
3812 uint64_t Mask = MaskCst->getZExtValue();
3828 uint64_t Shift = ShiftCst->getZExtValue();
3844 uint64_t C = RHSC->getZExtValue();
3853 isLegalArithImmed((uint32_t)(C - 1))) ||
3855 isLegalArithImmed(C - 1ULL))) {
3857 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3864 isLegalArithImmed((uint32_t)(C - 1))) ||
3865 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
3867 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3908 !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
3931 // -1 constant. For example,
3942 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
3943 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
3944 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
3945 LHS.getNode()->hasNUsesOfValue(1, 0)) {
3946 int16_t ValueofRHS = RHS->getAsZExtVal();
3947 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
3958 if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
3960 if ((CC == ISD::SETNE) ^ RHSC->isZero())
4008 // Extend to 64-bits, then perform a 64-bit multiply.
4015 // Check that the result fits into a 32-bit integer.
4055 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
4066 !Subtarget->isNeonAvailable()))
4076 // -->
4082 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
4101 // (xor x, (select_cc a, b, cc, 0, -1) )
4102 // -->
4103 // (csel x, (xor x, -1), cc ...)
4107 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
4113 // FIXME: This could be generalized to non-integer comparisons.
4126 if (CTVal->isAllOnes() && CFVal->isZero()) {
4133 if (CTVal->isZero() && CFVal->isAllOnes()) {
4139 DAG.getConstant(-1ULL, dl, Other.getValueType()));
4252 // The front-end should have filtered out the out-of-range values
4253 assert(Locality <= 3 && "Prefetch locality out-of-range");
4257 Locality = 3 - Locality;
4276 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
4289 bool IsStrict = Op->isStrictFPOpcode();
4294 if (useSVEForFixedLengthVectorVT(SrcVT, !Subtarget->isNeonAvailable()))
4300 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4301 Subtarget->hasBF16())) {
4371 bool IsStrict = Op->isStrictFPOpcode();
4382 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
4383 useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
4389 if ((InVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4436 // Use a scalar operation for conversions between single-element vectors of
4456 bool IsStrict = Op->isStrictFPOpcode();
4463 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4489 // AArch64 FP-to-int conversions saturate to the destination element size, so
4494 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4512 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
4567 // AArch64 FP-to-int conversions saturate to the destination register size, so
4576 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4582 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
4592 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
4633 // Round the floating-point value into a floating-point register with the
4647 bool IsStrict = Op->isStrictFPOpcode();
4669 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
4670 useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
4715 // Use a scalar operation for conversions between single-element vectors of
4736 bool IsStrict = Op->isStrictFPOpcode();
4739 bool IsSigned = Op->getOpcode() == ISD::STRICT_SINT_TO_FP ||
4740 Op->getOpcode() == ISD::SINT_TO_FP;
4770 // We need to be careful about i64 -> bf16.
4793 // double-precision value or it is too big. If it is sufficiently small,
4794 // we should just go u64 -> double -> bf16 in a naive way. Otherwise, we
4795 // ensure that u64 -> double has no rounding error by only using the 52
4855 if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4863 // Other conversions are legal, unless it's to the completely software-based
4925 "Expected int->fp bitcast!");
4971 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
4972 // 64-bits we need to insert a new extension so that it will be 64-bits.
4983 // Returns lane if Op extracts from a two-element vector and lane is constant
4988 if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
4991 EVT VT = OpNode->getOperand(0).getValueType();
4992 ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));
4996 return C->getZExtValue();
5006 for (const SDValue &Elt : N->op_values()) {
5011 if (!isIntN(HalfSize, C->getSExtValue()))
5014 if (!isUIntN(HalfSize, C->getZExtValue()))
5072 return N0->hasOneUse() && N1->hasOneUse() &&
5083 return N0->hasOneUse() && N1->hasOneUse() &&
5092 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
5115 SDValue Chain = Op->getOperand(0);
5116 SDValue RMValue = Op->getOperand(1);
5120 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
5121 // ((arg - 1) & 3) << 22).
5159 SDValue Chain = Op->getOperand(0);
5178 SDValue Chain = Op->getOperand(0);
5179 SDValue Mode = Op->getOperand(1);
5193 SDValue Chain = Op->getOperand(0);
5281 bool OverrideNEON = !Subtarget->isNeonAvailable();
5285 // Multiplications are only custom-lowered for 128-bit and 64-bit vectors so
5288 "unexpected type for custom-lowering ISD::MUL");
5303 if (Subtarget->hasSVE())
5320 if (Subtarget->hasSVE())
5342 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
5343 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
5409 "Expected a predicate-to-predicate bitcast");
5416 // e.g. <n x 16 x i1> -> <n x 16 x i1>
5424 // case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
5461 // ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11]
5465 // ->
5477 // ->
5483 // Case 4: If the vecnum is an add of an immediate, then the non-immediate
5487 // ->
5498 SDValue TileSlice = N->getOperand(2);
5499 SDValue Base = N->getOperand(3);
5500 SDValue VecNum = N->getOperand(4);
5507 ConstAddend = cast<ConstantSDNode>(VecNum.getOperand(1))->getSExtValue();
5510 ConstAddend = ImmNode->getSExtValue();
5515 if (int32_t C = (ConstAddend - ImmAddend)) {
5572 Op->getOperand(0), // Chain
5578 Op->getOperand(0), // Chain
5593 SDValue Chain = Node->getChain();
5598 auto Alignment = Node->getMemOperand()->getAlign();
5599 bool IsVol = Node->isVolatile();
5600 auto DstPtrInfo = Node->getPointerInfo();
5659 SelectionDAG &DAG) -> SDValue {
5662 // re-use the dag-combiner function with aarch64_neon_{pmull,smull,umull}.
5687 // the non-high version of PMULL instruction. Use v1i64 to represent i64.
5958 const auto *RegInfo = Subtarget->getRegisterInfo();
5959 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
5971 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
6039 // then extracting a fixed-width subvector from the scalable vector.
6072 // an SVE predicate register mask from the fixed-width vector.
6097 // SVE only supports implicit extension of 32-bit indices.
6098 if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
6105 // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit
6112 if (!ExtVT.isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
6120 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal->getOperand(0))) {
6121 if (!isLoadExtLegalOrCustom(ISD::ZEXTLOAD, ExtVT, Ld->getValueType(0))) {
6122 // Disable extending masked loads for fixed-width for now, since the code
6128 for (auto *U : Ld->getMask()->uses())
6160 return AddrModes.find(Key)->second;
6190 SDValue Chain = MGT->getChain();
6191 SDValue PassThru = MGT->getPassThru();
6192 SDValue Mask = MGT->getMask();
6193 SDValue BasePtr = MGT->getBasePtr();
6194 SDValue Index = MGT->getIndex();
6195 SDValue Scale = MGT->getScale();
6197 EVT MemVT = MGT->getMemoryVT();
6198 ISD::LoadExtType ExtType = MGT->getExtensionType();
6199 ISD::MemIndexType IndexType = MGT->getIndexType();
6203 if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) {
6206 DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
6207 MGT->getMemOperand(), IndexType, ExtType);
6212 bool IsScaled = MGT->isIndexScaled();
6213 bool IsSigned = MGT->isIndexSigned();
6217 uint64_t ScaleVal = Scale->getAsZExtVal();
6219 assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
6226 return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
6227 MGT->getMemOperand(), IndexType, ExtType);
6232 assert(Subtarget->useSVEForFixedLengthVectors() &&
6235 // NOTE: Handle floating-point as if integer then bitcast the result.
6262 PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT)
6269 Ops, MGT->getMemOperand(), IndexType, ExtType);
6289 SDValue Chain = MSC->getChain();
6290 SDValue StoreVal = MSC->getValue();
6291 SDValue Mask = MSC->getMask();
6292 SDValue BasePtr = MSC->getBasePtr();
6293 SDValue Index = MSC->getIndex();
6294 SDValue Scale = MSC->getScale();
6296 EVT MemVT = MSC->getMemoryVT();
6297 ISD::MemIndexType IndexType = MSC->getIndexType();
6298 bool Truncating = MSC->isTruncatingStore();
6300 bool IsScaled = MSC->isIndexScaled();
6301 bool IsSigned = MSC->isIndexSigned();
6305 uint64_t ScaleVal = Scale->getAsZExtVal();
6307 assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
6314 return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
6315 MSC->getMemOperand(), IndexType, Truncating);
6320 assert(Subtarget->useSVEForFixedLengthVectors() &&
6323 // Once bitcast we treat floating-point scatters as if integer.
6357 return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
6358 MSC->getMemOperand(), IndexType, Truncating);
6369 EVT VT = Op->getValueType(0);
6374 SDValue PassThru = LoadNode->getPassThru();
6375 SDValue Mask = LoadNode->getMask();
6377 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
6381 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
6382 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
6383 LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
6384 LoadNode->getExtensionType());
6398 SDValue Value = ST->getValue();
6419 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
6420 ST->getBasePtr(), ST->getMemOperand());
6432 SDValue Value = StoreNode->getValue();
6435 EVT MemVT = StoreNode->getMemoryVT();
6440 /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
6443 unsigned AS = StoreNode->getAddressSpace();
6444 Align Alignment = StoreNode->getAlign();
6447 StoreNode->getMemOperand()->getFlags(),
6452 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
6456 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
6457 // the custom lowering, as there are no un-paired non-temporal stores and
6460 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
6469 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
6473 StoreNode->getValue(),
6477 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
6478 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
6481 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
6484 SDValue Value = StoreNode->getValue();
6485 assert(Value->getValueType(0) == MVT::i64x8);
6486 SDValue Chain = StoreNode->getChain();
6487 SDValue Base = StoreNode->getBasePtr();
6494 Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
6495 StoreNode->getOriginalAlign());
6503 /// Lower atomic or volatile 128-bit stores to a single STP instruction.
6507 assert(StoreNode->getMemoryVT() == MVT::i128);
6508 assert(StoreNode->isVolatile() || StoreNode->isAtomic());
6511 StoreNode->getMergedOrdering() == AtomicOrdering::Release;
6512 if (StoreNode->isAtomic())
6513 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
6514 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
6515 StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
6516 StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
6518 SDValue Value = (StoreNode->getOpcode() == ISD::STORE ||
6519 StoreNode->getOpcode() == ISD::ATOMIC_STORE)
6520 ? StoreNode->getOperand(1)
6521 : StoreNode->getOperand(2);
6529 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
6530 StoreNode->getBasePtr()},
6531 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
6541 if (LoadNode->getMemoryVT() == MVT::i64x8) {
6543 SDValue Base = LoadNode->getBasePtr();
6544 SDValue Chain = LoadNode->getChain();
6550 LoadNode->getPointerInfo(),
6551 LoadNode->getOriginalAlign());
6560 EVT VT = Op->getValueType(0);
6563 if (LoadNode->getMemoryVT() != MVT::v4i8)
6567 if (Subtarget->requiresStrictAlign() && LoadNode->getAlign() < Align(4))
6571 if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
6573 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
6574 LoadNode->getExtensionType() == ISD::EXTLOAD)
6579 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
6580 LoadNode->getBasePtr(), MachinePointerInfo());
6639 VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
6698 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
6709 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
6937 if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
6938 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
6978 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
6993 !Subtarget->isNeonAvailable()))
7067 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
7092 return !Subtarget->useSVEForFixedLengthVectors();
7117 // NEON-sized vectors can be emulated using SVE instructions.
7119 return Subtarget->isSVEorStreamingSVEAvailable();
7126 if (!Subtarget->useSVEForFixedLengthVectors())
7130 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
7141 //===----------------------------------------------------------------------===//
7143 //===----------------------------------------------------------------------===//
7146 unsigned Opcode = N->getOpcode();
7151 unsigned IID = N->getConstantOperandVal(0);
7187 // The non-vararg case is handled in the CC function itself.
7200 if (Subtarget->isTargetWindows()) {
7202 if (Subtarget->isWindowsArm64EC())
7208 if (!Subtarget->isTargetDarwin())
7212 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
7216 if (Subtarget->isWindowsArm64EC())
7222 if (Subtarget->isWindowsArm64EC())
7245 if (Subtarget->isWindowsArm64EC())
7264 Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
7266 (isVarArg && Subtarget->isWindowsArm64EC());
7273 FuncInfo->setIsSVECC(true);
7292 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
7296 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
7324 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
7328 // non-compliant manner for larger structs.
7333 // FIXME: This works on big-endian for composite byvals, which are the common
7344 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
7366 FuncInfo->setIsSVECC(true);
7369 FuncInfo->setIsSVECC(true);
7372 FuncInfo->setIsSVECC(true);
7387 // tn: res,ch,glue = CopyFromReg t(n-1), ..
7403 // If this is an 8, 16 or 32-bit value, it is really passed promoted
7413 (VA.getValVT().isScalableVT() || Subtarget->isWindowsArm64EC()) &&
7437 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
7439 BEAlign = 8 - ArgSize;
7477 Subtarget->isWindowsArm64EC()) &&
7498 Subtarget->isWindowsArm64EC()) &&
7504 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
7516 NumParts--;
7537 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
7541 // i1 arguments are zero-extended to i8 by the caller. Emit a
7545 if (OrigArg->getType()->isIntegerTy(1)) {
7566 FuncInfo->setPStateSMReg(Reg);
7587 if (!Subtarget->isTargetDarwin() || IsWin64) {
7588 // The AAPCS variadic function ABI is identical to the non-variadic
7598 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
7599 VarArgsOffset = alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
7600 FuncInfo->setVarArgsStackOffset(VarArgsOffset);
7601 FuncInfo->setVarArgsStackIndex(
7610 FuncInfo->getForwardedMustTailRegParms();
7630 assert(!FuncInfo->getSRetReturnReg());
7635 FuncInfo->setSRetReturnReg(Reg);
7647 // This is a non-standard ABI so by fiat I say we're allowed to make full
7654 FuncInfo->setArgumentStackToRestore(StackArgSize);
7662 FuncInfo->setBytesInStackArgArea(StackArgSize);
7664 if (Subtarget->hasCustomCallingConv())
7665 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
7670 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
7676 if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) {
7696 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
7718 Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
7724 if (Subtarget->isWindowsArm64EC()) {
7725 // In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs
7731 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
7735 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
7738 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
7743 if (Subtarget->isWindowsArm64EC()) {
7745 // compute its address relative to x4. For a normal AArch64->AArch64
7762 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
7769 FuncInfo->setVarArgsGPRIndex(GPRIdx);
7770 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
7772 if (Subtarget->hasFPARMv8() && !IsWin64) {
7777 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
7795 FuncInfo->setVarArgsFPRIndex(FPRIdx);
7796 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
7804 /// LowerCallResult - Lower the result values of a call into the
7912 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC, IsVarArg);
7982 // The check for matching callee-saved regs will determine whether it is
7985 MF.getInfo<AArch64FunctionInfo>()->isSVECC())
7990 // When using the Windows calling convention on a non-windows OS, we want
7993 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
8003 if (i->hasByValAttr())
8006 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
8012 if (i->hasInRegAttr())
8019 // Externally-defined functions with weak linkage should not be
8020 // tail-called on AArch64 when the OS does not support dynamic
8021 // pre-emption of symbols, as the AAELF spec requires normal calls
8024 // situation (as used for tail calls) is implementation-defined, so we
8027 const GlobalValue *GV = G->getGlobal();
8029 if (GV->hasExternalWeakLinkage() &&
8050 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8051 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8053 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8054 if (Subtarget->hasCustomCallingConv()) {
8055 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
8056 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
8058 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8071 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
8093 Subtarget->isWindowsArm64EC()) &&
8101 if (CCInfo.getStackSize() > FuncInfo->getBytesInStackArgArea())
8117 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
8125 for (SDNode *U : DAG.getEntryNode().getNode()->uses())
8127 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
8128 if (FI->getIndex() < 0) {
8129 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
8131 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
8148 // Check if the value is zero-extended from i1 to i8
8162 // Live-in physreg copies that are glued to SMSTART are applied as
8163 // implicit-def's in the InstrEmitter. Here we remove them, allowing the
8165 // copies to avoid these fake clobbers of actually-preserved GPRs.
8168 for (unsigned I = MI.getNumOperands() - 1; I > 0; --I)
8187 // frame-address. If they contain a frame-index to a scalable vector, this
8191 if (MF.getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
8209 FuncInfo->setHasStreamingModeChanges(true);
8211 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8212 SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask());
8243 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
8265 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
8269 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
8270 !Subtarget->noBTIAtReturnTwice()) {
8271 GuardWithBTI = FuncInfo->branchTargetEnforcement();
8324 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
8341 // arguments to begin at SP+0. Completely unused for non-tail calls.
8345 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
8348 // popped size 16-byte aligned.
8354 FPDiff = NumReusableBytes - NumBytes;
8358 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
8359 FuncInfo->setTailCallReservedStack(-FPDiff);
8361 // The stack pointer must be 16-byte aligned at all times it's used for a
8364 // a 16-byte aligned SP and the delta applied for the tail call should
8374 CalleeAttrs = SMEAttrs(ES->getSymbol());
8377 [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
8380 R << ore::NV("Callee", ES->getSymbol());
8381 else if (CLI.CB && CLI.CB->getCalledFunction())
8382 R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
8391 const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
8455 // PSTATE.ZA before the call if there is no lazy-save active.
8458 "Lazy-save should have PSTATE.SM=1 on entry to the function");
8479 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
8480 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
8490 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
8508 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
8511 // already be zero-extended.
8513 // We cannot just emit a (zext i8 (trunc (assert-zext i8)))
8517 // (ext (zext x)) -> (zext x)
8529 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
8545 assert((isScalable || Subtarget->isWindowsArm64EC()) &&
8552 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
8575 NumParts--;
8613 // parts of an [N x i32] into an X-register. The extension type will
8621 ->second;
8649 // FIXME: This works on big-endian for composite byvals, which are the
8660 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
8663 BEAlign = 8 - OpSize;
8712 if (IsVarArg && Subtarget->isWindowsArm64EC()) {
8735 if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
8748 // Build a sequence of copy-to-reg nodes chained together with token chain
8760 auto GV = G->getGlobal();
8762 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
8767 const GlobalValue *GV = G->getGlobal();
8772 Subtarget->isTargetMachO()) ||
8773 MF.getFunction().getParent()->getRtLibUseGOT();
8774 const char *Sym = S->getSymbol();
8783 // We don't usually want to end the call-sequence here because we would tidy
8784 // the frame up *after* the call, however in the ABI-changing tail-call case
8825 const uint64_t Key = CLI.PAI->Key;
8832 extractPtrauthBlendDiscriminators(CLI.PAI->Discriminator, &DAG);
8849 // Add a register mask operand representing the call-preserved registers.
8851 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8853 // For 'this' returns, use the X0-preserving mask if applicable
8854 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
8857 Mask = TRI->getCallPreservedMask(MF, CallConv);
8860 Mask = TRI->getCallPreservedMask(MF, CallConv);
8862 if (Subtarget->hasCustomCallingConv())
8863 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
8865 if (TRI->isAnyArgRegReserved(MF))
8866 TRI->emitReservedArgRegCallError(MF);
8882 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
8892 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
8911 InGlue = Result.getValue(Result->getNumValues() - 1);
8919 if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
8941 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
8943 TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
8976 // a vreg -> vreg copy.
8990 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
9040 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
9055 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
9066 })->second;
9074 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
9080 Register Reg = FuncInfo->getPStateSMReg();
9108 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
9122 const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
9158 //===----------------------------------------------------------------------===//
9160 //===----------------------------------------------------------------------===//
9165 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
9166 N->getOffset(), Flag);
9172 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
9178 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
9179 N->getOffset(), Flag);
9185 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
9191 return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
9251 const GlobalValue *GV = GN->getGlobal();
9252 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
9255 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
9312 assert(Subtarget->isTargetDarwin() &&
9318 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
9343 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
9344 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
9345 if (Subtarget->hasCustomCallingConv())
9346 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
9358 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
9359 if (DAG.getMachineFunction().getFunction().hasFnAttribute("ptrauth-calls")) {
9373 /// Convert a thread-local variable reference into a sequence of instructions to
9464 /// When accessing thread-local variables under either the general-dynamic or
9465 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
9466 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
9479 /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
9500 assert(Subtarget->isTargetELF() && "This function expects an ELF target");
9504 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
9525 const GlobalValue *GV = GA->getGlobal();
9535 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
9543 MFI->incNumLocalDynamicTLSAccesses();
9552 // thread-local area.
9556 // in its thread-storage area.
9587 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
9627 const GlobalValue *GV = GA->getGlobal();
9649 if (Subtarget->isTargetDarwin())
9651 if (Subtarget->isTargetELF())
9653 if (Subtarget->isTargetWindows())
9659 //===----------------------------------------------------------------------===//
9663 // - MOVaddrPAC: similar to MOVaddr, with added PAC.
9667 // - LOADgotPAC: similar to LOADgot, with added PAC.
9670 // section is assumed to be read-only (for example, via relro mechanism). See
9673 // - LOADauthptrstatic: similar to LOADgot, but use a
9681 // provide integrity guarantees on the to-be-signed intermediate values.
9684 // with often similarly-signed pointers, making it a good harvesting target.
9692 assert(TGN->getGlobal()->hasExternalWeakLinkage());
9698 if (TGN->getOffset() != 0)
9700 "unsupported non-zero offset in weak ptrauth global reference");
9703 report_fatal_error("unsupported weak addr-div ptrauth global");
9725 // Blend only works if the integer discriminator is 16-bit wide.
9730 // Choosing between 3 lowering alternatives is target-specific.
9731 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
9740 const GlobalValue *PtrGV = PtrN->getGlobal();
9744 Subtarget->ClassifyGlobalReference(PtrGV, getTargetMachine());
9747 "unsupported non-GOT op flags on ptrauth global reference");
9750 PtrOffsetC += PtrN->getOffset();
9753 assert(PtrN->getTargetFlags() == 0 &&
9762 // No GOT load needed -> MOVaddrPAC
9764 assert(!PtrGV->hasExternalWeakLinkage() && "extern_weak should use GOT");
9771 // GOT load -> LOADgotPAC
9773 if (!PtrGV->hasExternalWeakLinkage())
9779 // extern_weak ref -> LOADauthptrstatic
9791 cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
9796 Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
9798 return {Val, Val.getValueSizeInBits() - 1};
9803 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
9835 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
9858 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
9862 // out of bounds, a late MI-layer pass rewrites branches.
9878 // out of bounds, a late MI-layer pass rewrites branches.
9901 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
9940 if (!Subtarget->isNeonAvailable() &&
9941 !Subtarget->useSVEForFixedLengthVectors())
9960 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
9979 auto SetVecVal = [&](int Idx = -1) {
10011 // 64-bit elements. Instead, materialize all bits set and then negate that.
10039 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
10042 if (!Subtarget->isNeonAvailable())
10058 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
10059 // CNT V0.8B, V0.8B // 8xbyte pop-counts
10060 // ADDV B0, V0.8B // sum 8xbyte pop-counts
10104 if (Subtarget->hasDotProd() && VT.getScalarSizeInBits() != 16 &&
10141 VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()));
10174 VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
10201 VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
10252 // Skip the one-use zext
10253 if (N->getOpcode() == ISD::ZERO_EXTEND && N->hasOneUse())
10254 N = N->getOperand(0);
10257 if (N->getOpcode() == ISD::XOR) {
10258 WorkList.push_back(std::make_pair(N->getOperand(0), N->getOperand(1)));
10263 // All the non-leaf nodes must be OR.
10264 if (N->getOpcode() != ISD::OR || !N->hasOneUse())
10267 if (isOrXorChain(N->getOperand(0), Num, WorkList) &&
10268 isOrXorChain(N->getOperand(1), Num, WorkList))
10275 SDValue LHS = N->getOperand(0);
10276 SDValue RHS = N->getOperand(1);
10278 EVT VT = N->getValueType(0);
10282 if (N->getOpcode() != ISD::SETCC)
10285 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10290 LHS->getOpcode() == ISD::OR && LHS->hasOneUse() &&
10314 bool IsStrict = Op->isStrictFPOpcode();
10322 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
10417 ISD::CondCode Cond = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10445 if ((LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
10459 // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
10460 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
10462 if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
10463 CTVal->isOne() && CFVal->isAllOnes() &&
10468 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
10473 // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
10474 // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
10477 RHSC && RHSC->isZero() && CFVal && CFVal->isZero() &&
10482 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
10494 if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
10498 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
10519 const int64_t TrueVal = CTVal->getSExtValue();
10520 const int64_t FalseVal = CFVal->getSExtValue();
10529 TrueVal == -FalseVal) {
10532 // If our operands are only 32-bit wide, make sure we use 32-bit
10536 // 64-bit arithmetic).
10537 const uint32_t TrueVal32 = CTVal->getZExtValue();
10538 const uint32_t FalseVal32 = CFVal->getZExtValue();
10548 // 64-bit check whether we can use CSINC.
10581 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
10582 !RHSVal->isZero() && !RHSVal->isAllOnes()) {
10590 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
10592 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
10624 if (RHSVal && RHSVal->isZero()) {
10629 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
10632 CFVal && CFVal->isZero() &&
10664 // -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
10674 // Create a predicate where all but the last -IdxVal elements are false.
10694 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
10705 SDValue CCVal = Op->getOperand(0);
10706 SDValue TVal = Op->getOperand(1);
10707 SDValue FVal = Op->getOperand(2);
10725 if (useSVEForFixedLengthVectorVT(Ty, !Subtarget->isNeonAvailable())) {
10740 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
10758 CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
10767 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
10776 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
10791 !Subtarget->isTargetMachO())
10805 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
10808 AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
10810 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
10813 "aarch64-jump-table-hardening")) {
10815 if (Subtarget->isTargetMachO()) {
10817 report_fatal_error("Unsupported code-model for hardened jump-table");
10820 assert(Subtarget->isTargetELF() &&
10823 report_fatal_error("Unsupported code-model for hardened jump-table");
10846 // Skip over the jump-table BRINDs, where the destination is JumpTableDest32.
10847 if (Dest->isMachineOpcode() &&
10848 Dest->getMachineOpcode() == AArch64::JumpTableDest32)
10853 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.getFunction());
10874 if (Subtarget->isTargetMachO()) {
10888 const BlockAddress *BA = BAN->getBlockAddress();
10891 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
10892 *BA->getFunction())) {
10896 SDValue TargetBA = DAG.getTargetBlockAddress(BA, BAN->getValueType(0));
10911 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) {
10926 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
10929 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
10941 if (Subtarget->isWindowsArm64EC()) {
10943 // relative to x4. For a normal AArch64->AArch64 call, x4 == sp on entry,
10948 if (FuncInfo->getVarArgsGPRSize() > 0)
10949 StackOffset = -(uint64_t)FuncInfo->getVarArgsGPRSize();
10951 StackOffset = FuncInfo->getVarArgsStackOffset();
10955 FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
10956 ? FuncInfo->getVarArgsGPRIndex()
10957 : FuncInfo->getVarArgsStackIndex(),
10960 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
10971 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
10978 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
10983 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
10990 int GPRSize = FuncInfo->getVarArgsGPRSize();
10997 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
11009 int FPRSize = FuncInfo->getVarArgsFPRSize();
11015 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
11030 DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
11038 DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
11049 if (Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()))
11051 else if (Subtarget->isTargetDarwin())
11062 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
11064 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
11066 : Subtarget->isTargetILP32() ? 20 : 32;
11067 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
11068 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
11078 assert(Subtarget->isTargetDarwin() &&
11081 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
11087 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
11101 DAG.getConstant(Align->value() - 1, DL, PtrVT));
11103 DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
11157 while (Depth--)
11161 if (Subtarget->isTargetILP32())
11187 const AArch64RegisterInfo *MRI = Subtarget->getRegisterInfo();
11188 unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
11189 if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
11190 !MRI->isReservedReg(MF, Reg))
11231 // live-in.
11236 // The XPACLRI instruction assembles to a hint-space instruction before
11237 // Armv8.3-A therefore this instruction can be safely used for any pre
11238 // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
11241 if (Subtarget->hasPAuth()) {
11252 /// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
11271 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
11272 // 16-bit case when target has full fp16 support.
11281 IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
11283 IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
11286 (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
11302 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
11311 //===----------------------------------------------------------------------===//
11313 //===----------------------------------------------------------------------===//
11319 if ((ST->hasNEON() &&
11323 (ST->hasSVE() &&
11328 // the initial estimate is 2^-8. Thus the number of extra steps to refine
11336 : Log2_64_Ceil(DesiredBits) - Log2_64_Ceil(AccurateBits);
11367 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
11376 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
11377 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
11378 for (int i = ExtraSteps; i > 0; --i) {
11406 // Newton reciprocal iteration: E * (2 - X * E)
11407 // AArch64 reciprocal iteration instruction: (2 - M * N)
11408 for (int i = ExtraSteps; i > 0; --i) {
11421 //===----------------------------------------------------------------------===//
11423 //===----------------------------------------------------------------------===//
11429 // r - A general register
11430 // w - An FP/SIMD register of some size in the range v0-v31
11431 // x - An FP/SIMD register of some size in the range v0-v15
11432 // I - Constant that can be used with an ADD instruction
11433 // J - Constant that can be used with a SUB instruction
11434 // K - Constant that can be used with a 32-bit logical instruction
11435 // L - Constant that can be used with a 64-bit logical instruction
11436 // M - Constant that can be used as a 32-bit MOV immediate
11437 // N - Constant that can be used as a 64-bit MOV immediate
11438 // Q - A memory reference with base register and no offset
11439 // S - A symbolic address
11440 // Y - Floating point constant zero
11441 // Z - Integer constant zero
11443 // Note that general register operands will be output using their 64-bit x
11445 // is prefixed by the %w modifier. Floating-point and SIMD register operands
11456 if (!Subtarget->hasFPARMv8())
11528 // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands
11593 /// getConstraintType - Given a constraint letter, return the type of
11643 Type *type = CallOperandVal->getType();
11652 if (type->isFloatingPointTy() || type->isVectorTy())
11675 if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
11681 if (!Subtarget->hasFPARMv8())
11702 // only take 128-bit registers so just use that regclass.
11704 if (!Subtarget->hasFPARMv8())
11712 if (!Subtarget->hasFPARMv8())
11748 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
11750 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
11752 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
11753 // By default we'll emit v0-v31 for this unless there's a modifier where
11766 if (Res.second && !Subtarget->hasFPARMv8() &&
11777 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
11783 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
11830 uint64_t CVal = C->getZExtValue();
11836 // instruction [or vice versa], in other words -1 to -4095 with optional
11843 uint64_t NVal = -C->getSExtValue();
11845 CVal = C->getSExtValue();
11853 // distinguish between bit patterns that are valid 32-bit or 64-bit
11867 // also match 32 or 64-bit immediates that can be loaded either using a
11868 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
11869 // (M) or 64-bit 0x1234000000000000 (N) etc.
11913 // All assembler immediates are 64-bit integers.
11926 //===----------------------------------------------------------------------===//
11928 //===----------------------------------------------------------------------===//
11930 /// WidenVector - Given a value in the V64 register class, produce the
11943 /// getExtFactor - Determine the adjustment factor for the position when
11993 MaskSource = MaskSource->getOperand(0);
12009 !cast<ConstantSDNode>(MaskIdx)->getConstantIntValue()->equalsInt(i))
12015 MaskSourceVec = MaskSource->getOperand(0);
12018 } else if (MaskSourceVec != MaskSource->getOperand(0)) {
12026 // of elements in the source, or we would have an out-of-bounds access.
12089 "various elements of other fixed-width vectors, provided "
12102 Source->MinElt = std::min(Source->MinElt, EltNo);
12103 Source->MaxElt = std::max(Source->MaxElt, EltNo);
12117 Mask.push_back(-1);
12222 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
12233 Src.WindowBase = -NumSrcElts;
12259 Src.WindowBase = -Src.MinElt;
12288 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
12296 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
12310 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
12311 ExtractBase += NumElts * (Src - Sources.begin());
12447 SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
12464 // NumEltsPerBlock with some values possibly replaced by undef-s.
12466 // Find first non-undef element
12469 "Shuffle with all-undefs must have been caught by previous cases, "
12477 size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
12482 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
12501 // Look for the first non-undef element.
12511 return Elt != ExpectedElt++ && Elt != -1;
12519 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
12520 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
12526 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
12527 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
12533 Imm -= NumElts;
12538 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
12557 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
12576 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
12598 int LastLHSMismatch = -1, LastRHSMismatch = -1;
12601 if (M[i] == -1) {
12618 if (NumLHSMatch == NumInputElements - 1) {
12622 } else if (NumRHSMatch == NumInputElements - 1) {
12656 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
12679 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
12680 /// the specified operations to build the shuffle. ID is the perfect-shuffle
12689 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
12690 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
12720 auto getPFIDLane = [](unsigned ID, int Elt) -> int {
12722 Elt = 3 - Elt;
12725 Elt--;
12727 return (ID % 9 == 8) ? -1 : ID % 9;
12744 if (MaskElt == -1)
12745 MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
12747 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
12761 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
12793 // vrev <4 x i16> -> REV32
12798 // vrev <4 x i8> -> REV16
12820 SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
12826 unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
12862 // out of range values with 0s. We do need to make sure that any out-of-range
12863 // values are really out-of-range for a v16i8 vector.
12877 Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
12951 // Can't handle cases where vector size is not 128-bit
12959 // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
12961 // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
12962 // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
12975 // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
12980 // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
12982 Lane -= Idx * VT.getVectorNumElements() / 2;
12985 // Widen the operand to 128-bit register with undef.
13006 if (M0 == -1 && M1 == -1) {
13007 NewMask.push_back(-1);
13011 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
13016 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
13046 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
13048 // If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
13075 SDValue Tbl1 = Op->getOperand(0);
13076 SDValue Tbl2 = Op->getOperand(1);
13082 if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13083 Tbl1->getOperand(0) != Tbl2ID ||
13084 Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13085 Tbl2->getOperand(0) != Tbl2ID)
13088 if (Tbl1->getValueType(0) != MVT::v16i8 ||
13089 Tbl2->getValueType(0) != MVT::v16i8)
13092 SDValue Mask1 = Tbl1->getOperand(3);
13093 SDValue Mask2 = Tbl2->getOperand(3);
13097 TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
13100 dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
13103 TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);
13112 {ID, Tbl1->getOperand(1), Tbl1->getOperand(2),
13113 Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
13116 // Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
13118 // so custom-lower it as ZIP1-with-zeros.
13129 // FIXME: support multi-step zipping?
13144 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
13147 // Convert shuffles that are directly supported on NEON to target-specific
13151 ArrayRef<int> ShuffleMask = SVN->getMask();
13163 if (SVN->isSplat()) {
13164 int Lane = SVN->getSplatIndex();
13166 if (Lane == -1)
13172 // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
13226 } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
13273 SrcLane -= NumElts;
13292 // the PerfectShuffle-generated table to synthesize it from other shuffles.
13317 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
13352 // Current lowering only supports the SVE-ACLE types.
13361 if (CIdx && (CIdx->getZExtValue() <= 3)) {
13362 SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
13392 EVT VT = BVN->getValueType(0);
13396 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
13412 // Try 64-bit splatted SIMD immediate.
13433 // Try 32-bit splatted SIMD immediate.
13486 // Try 16-bit splatted SIMD immediate.
13531 // Try 32-bit splatted SIMD immediate with shifted ones.
13562 // Try 8-bit splatted SIMD immediate.
13622 ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
13625 EVT VT = Bvec->getValueType(0);
13628 if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
13630 ConstVal = FirstElt->getZExtValue();
13664 // If we're compiling for a specific vector-length, we can check if the
13684 // - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
13685 // - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
13688 EVT VT = N->getValueType(0);
13698 SDValue FirstOp = N->getOperand(0);
13700 SDValue SecondOp = N->getOperand(1);
13741 C2 = C2node->getZExtValue();
13756 C1AsAPInt = ~(C1nodeImm->getAPIntValue() << C1nodeShift->getAPIntValue());
13779 LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
13780 LLVM_DEBUG(N->dump(&DAG));
13782 LLVM_DEBUG(ResultSLI->dump(&DAG));
13791 !Subtarget->isNeonAvailable()))
13831 // We can always fall back to a non-immediate OR.
13848 for (SDValue Lane : Op->ops()) {
13850 // operands already. Otherwise, if Op is a floating-point splat
13855 CstLane->getZExtValue());
13857 } else if (Lane.getNode()->isUndef()) {
13909 // FNegate each sub-element of the constant
13934 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
13945 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
13946 if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
13949 SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
13950 SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
13961 // Thought this might return a non-BUILD_VECTOR (e.g. CONCAT_VECTORS), if so,
13971 if (BVN->isConstant()) {
13972 if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
13975 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
13979 if (ConstantFPSDNode *Const = BVN->getConstantFPSplatNode())
13980 if (Const->isZero() && !Const->isNegative())
13991 // 3) if only one constant value is used (w/ some non-constant lanes),
13993 // in the non-constant lanes.
13995 // select the values we'll be overwriting for the non-constant
14053 // ------------------------------------------------------------------
14068 // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
14085 if (!isa<ConstantSDNode>(N->getOperand(1))) {
14090 SDValue N0 = N->getOperand(0);
14108 uint64_t Val = N->getConstantOperandVal(1);
14113 if (Val - 1 == 2 * i) {
14138 // Use DUP for non-constant splats. For f32 constant splats, reduce to
14145 dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
14155 dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
14168 EltTy == MVT::f64) && "Unsupported floating-point vector type");
14185 // If we need to insert a small number of different non-constant elements and
14191 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
14195 // start by splatting that value, then replace the non-constant lanes. This
14204 ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize);
14213 // Now insert the non-constant lanes.
14340 // worse. For a vector with one or two non-undef values, that's
14360 // vector element types. After type-legalization the inserted value is
14389 !Subtarget->isNeonAvailable()))
14397 unsigned NumOperands = Op->getNumOperands();
14405 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
14428 !Subtarget->isNeonAvailable()))
14449 // Check for non-constant or out of range lane.
14451 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
14476 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14479 // Check for non-constant or out of range lane.
14481 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
14529 // If this is extracting the upper 64-bits of a 128-bit vector, we match
14531 if (Idx * InVT.getScalarSizeInBits() == 64 && Subtarget->isNeonAvailable())
14536 useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) {
14595 DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
14663 !isa<ConstantSDNode>(Op->getOperand(0)))
14666 SplatVal = Op->getConstantOperandVal(0);
14675 if (isPowerOf2_64(-SplatVal)) {
14676 SplatVal = -SplatVal;
14700 DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
14711 // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
14734 if (!Subtarget->isNeonAvailable())
14741 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14778 /// getVShiftImm - Check if this is a valid build_vector for the immediate
14789 if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
14797 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
14806 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
14809 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
14838 !Subtarget->isNeonAvailable()))
14851 if (Shift->getOpcode() != ISD::SRL)
14858 dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Shift->getOperand(1)));
14862 ShiftValue = ShiftOp1->getZExtValue();
14866 SDValue Add = Shift->getOperand(0);
14867 if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse())
14873 uint64_t ExtraBits = VT.getScalarSizeInBits() - ResVT.getScalarSizeInBits();
14874 if (ShiftValue > ExtraBits && !Add->getFlags().hasNoUnsignedWrap())
14878 dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Add->getOperand(1)));
14881 uint64_t AddValue = AddOp1->getZExtValue();
14882 if (AddValue != 1ULL << (ShiftValue - 1))
14885 RShOperand = Add->getOperand(0);
14902 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14915 (Subtarget->hasSVE2() ||
14916 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
14926 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
14937 DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
14971 bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
15076 !Subtarget->isNeonAvailable()))
15079 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
15093 // Lower isnan(x) | isnan(never-nan) to x != x.
15094 // Lower !isnan(x) & !isnan(never-nan) to x == x.
15137 bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
15188 "Expected power-of-2 length vector");
15274 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
15283 SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
15350 // LSE has an atomic load-clear instruction, but not a load-and.
15356 RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
15357 return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
15359 AN->getMemOperand());
15372 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
15373 EVT VT = Node->getValueType(0);
15376 "no-stack-arg-probe")) {
15382 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
15391 SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
15394 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
15395 const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
15396 if (Subtarget->hasCustomCallingConv())
15397 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
15408 // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
15419 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
15437 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
15439 EVT VT = Node->getValueType(0);
15447 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
15460 if (Subtarget->isTargetWindows())
15470 if (Subtarget->hasSVE2())
15495 const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
15500 assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
15504 Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
15506 Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
15513 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
15536 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
15537 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
15554 unsigned NumElts = StructTy->getNumElements();
15555 Type *VecTy = StructTy->getElementType(0);
15557 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts);
15558 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
15574 Type *ArgTy = Arg->getType();
15575 if (!ArgTy->isVectorTy())
15579 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
15580 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
15593 Type *VecTy = I.getArgOperand(0)->getType();
15597 Type *ArgTy = Arg->getType();
15598 if (!ArgTy->isVectorTy())
15603 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts);
15604 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
15652 Type *ElTy = cast<VectorType>(I.getType())->getElementType();
15663 cast<VectorType>(I.getArgOperand(0)->getType())->getElementType();
15665 Info.memVT = MVT::getVT(I.getOperand(0)->getType());
15676 Info.memVT = MVT::getVT(Val->getType());
15707 const SDValue &Base = Mem->getBasePtr();
15712 // It's unknown whether a scalable vector has a power-of-2 bitwidth.
15713 if (Mem->getMemoryVT().isScalableVector())
15718 uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
15729 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->use_size()) {
15742 // Truncations from 64-bit GPR to 32-bit GPR is free.
15744 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15746 uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedValue();
15747 uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedValue();
15762 if (I->getOpcode() != Instruction::FMul)
15765 if (!I->hasOneUse())
15768 Instruction *User = I->user_back();
15770 if (!(User->getOpcode() == Instruction::FSub ||
15771 User->getOpcode() == Instruction::FAdd))
15775 const Function *F = I->getFunction();
15776 const DataLayout &DL = F->getDataLayout();
15777 Type *Ty = User->getOperand(0)->getType();
15785 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
15786 // 64-bit GPR.
15788 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15790 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15791 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15811 // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
15822 if (Ext->getType()->isVectorTy())
15825 for (const Use &U : Ext->uses()) {
15833 switch (Instr->getOpcode()) {
15835 if (!isa<ConstantInt>(Instr->getOperand(1)))
15840 auto &DL = Ext->getDataLayout();
15841 std::advance(GTI, U.getOperandNo()-1);
15844 // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
15846 // log2(sizeof(IdxTy)) - log2(8).
15847 if (IdxTy->isScalableTy())
15850 llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) -
15861 if (Instr->getType() == Ext->getOperand(0)->getType())
15876 return all_equal(Shuf->getShuffleMask());
15885 auto *FullTy = FullV->getType();
15886 auto *HalfTy = HalfV->getType();
15887 return FullTy->getPrimitiveSizeInBits().getFixedValue() ==
15888 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
15892 auto *FullVT = cast<FixedVectorType>(FullV->getType());
15893 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
15894 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
15920 int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
15940 return Ext->getType()->getScalarSizeInBits() ==
15941 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
15959 ElementIndex->getValue() == 1 &&
15960 isa<FixedVectorType>(VectorOperand->getType()) &&
15961 cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
15972 if (!GEP || GEP->getNumOperands() != 2)
15975 Value *Base = GEP->getOperand(0);
15976 Value *Offsets = GEP->getOperand(1);
15979 if (Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
15982 // Sink extends that would allow us to use 32-bit offset vectors.
15985 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
15986 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
15987 Ops.push_back(&GEP->getOperandUse(1));
16002 Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0));
16007 Value *ZExtOp = cast<Instruction>(Op)->getOperand(0);
16008 Ops.push_back(&cast<Instruction>(ZExtOp)->getOperandUse(0));
16009 Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0));
16021 switch (II->getIntrinsicID()) {
16024 if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1),
16026 Ops.push_back(&II->getOperandUse(0));
16027 Ops.push_back(&II->getOperandUse(1));
16033 if (isa<VectorType>(I->getType()) &&
16034 cast<VectorType>(I->getType())->getElementType()->isHalfTy() &&
16035 !Subtarget->hasFullFP16())
16042 if (isSplatShuffle(II->getOperand(0)))
16043 Ops.push_back(&II->getOperandUse(0));
16044 if (isSplatShuffle(II->getOperand(1)))
16045 Ops.push_back(&II->getOperandUse(1));
16052 if (isSplatShuffle(II->getOperand(1)))
16053 Ops.push_back(&II->getOperandUse(1));
16054 if (isSplatShuffle(II->getOperand(2)))
16055 Ops.push_back(&II->getOperandUse(2));
16059 if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
16060 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
16061 Ops.push_back(&II->getOperandUse(0));
16067 auto *Idx = dyn_cast<Instruction>(II->getOperand(1));
16068 if (!Idx || Idx->getOpcode() != Instruction::Add)
16070 Ops.push_back(&II->getOperandUse(1));
16097 auto *Idx = dyn_cast<Instruction>(II->getOperand(3));
16098 if (!Idx || Idx->getOpcode() != Instruction::Add)
16100 Ops.push_back(&II->getOperandUse(3));
16104 if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
16106 Ops.push_back(&II->getOperandUse(0));
16107 Ops.push_back(&II->getOperandUse(1));
16110 if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
16111 II->getArgOperand(1)))
16113 Ops.push_back(&II->getArgOperandUse(0));
16114 Ops.push_back(&II->getArgOperandUse(1));
16117 if (!shouldSinkVectorOfPtrs(II->getArgOperand(0), Ops))
16119 Ops.push_back(&II->getArgOperandUse(0));
16122 if (!shouldSinkVectorOfPtrs(II->getArgOperand(1), Ops))
16124 Ops.push_back(&II->getArgOperandUse(1));
16132 switch (I->getOpcode()) {
16136 for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {
16137 if (shouldSinkVScale(I->getOperand(Op), Ops)) {
16138 Ops.push_back(&I->getOperandUse(Op));
16147 if (!I->getType()->isVectorTy())
16150 switch (I->getOpcode()) {
16153 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
16158 auto Ext1 = cast<Instruction>(I->getOperand(0));
16159 auto Ext2 = cast<Instruction>(I->getOperand(1));
16160 if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
16161 Ops.push_back(&Ext1->getOperandUse(0));
16162 Ops.push_back(&Ext2->getOperandUse(0));
16165 Ops.push_back(&I->getOperandUse(0));
16166 Ops.push_back(&I->getOperandUse(1));
16171 // Pattern: Or(And(MaskValue, A), And(Not(MaskValue), B)) ->
16172 // bitselect(MaskValue, A, B) where Not(MaskValue) = Xor(MaskValue, -1)
16173 if (Subtarget->hasNEON()) {
16182 Instruction *MainAnd = I->getOperand(0) == OtherAnd
16183 ? cast<Instruction>(I->getOperand(1))
16184 : cast<Instruction>(I->getOperand(0));
16187 if (I->getParent() != MainAnd->getParent() ||
16188 I->getParent() != OtherAnd->getParent())
16191 // Non-mask operands of both Ands should also be in same basic block
16192 if (I->getParent() != IA->getParent() ||
16193 I->getParent() != IB->getParent())
16196 Ops.push_back(&MainAnd->getOperandUse(MainAnd->getOperand(0) == IA ? 1 : 0));
16197 Ops.push_back(&I->getOperandUse(0));
16198 Ops.push_back(&I->getOperandUse(1));
16209 for (auto &Op : I->operands()) {
16211 if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
16228 match(Shuffle->getOperand(0), m_ZExtOrSExt(m_Value()))) {
16229 Ops.push_back(&Shuffle->getOperandUse(0));
16231 if (match(Shuffle->getOperand(0), m_SExt(m_Value())))
16241 Value *ShuffleOperand = Shuffle->getOperand(0);
16246 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
16251 dyn_cast<ConstantInt>(Insert->getOperand(2));
16253 if (!ElementConstant || !ElementConstant->isZero())
16256 unsigned Opcode = OperandInstr->getOpcode();
16264 unsigned Bitwidth = I->getType()->getScalarSizeInBits();
16266 const DataLayout &DL = I->getDataLayout();
16272 Ops.push_back(&Shuffle->getOperandUse(0));
16302 for (unsigned I = IsLittleEndian ? 0 : Factor - 1; I < MaskLen; I += Factor)
16312 auto *SrcTy = cast<FixedVectorType>(Op->getType());
16313 unsigned NumElts = SrcTy->getNumElements();
16314 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16315 auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16333 auto *SrcTy = cast<FixedVectorType>(Op->getType());
16334 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16335 auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16338 if (!createTblShuffleMask(SrcWidth, DstWidth, SrcTy->getNumElements(),
16351 int NumElements = cast<FixedVectorType>(TI->getType())->getNumElements();
16352 auto *SrcTy = cast<FixedVectorType>(TI->getOperand(0)->getType());
16353 auto *DstTy = cast<FixedVectorType>(TI->getType());
16354 assert(SrcTy->getElementType()->isIntegerTy() &&
16355 "Non-integer type source vector element is not supported");
16356 assert(DstTy->getElementType()->isIntegerTy(8) &&
16359 cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16361 cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16373 // 0,8,16,..Y*8th bytes for the little-endian format
16379 : Itr * TruncFactor + (TruncFactor - 1)));
16402 Builder.CreateShuffleVector(TI->getOperand(0), ShuffleLanes), VecTy));
16405 auto *F = Intrinsic::getDeclaration(TI->getModule(),
16435 auto *F = Intrinsic::getDeclaration(TI->getModule(), TblID, VecTy);
16463 TI->replaceAllUsesWith(FinalResult);
16464 TI->eraseFromParent();
16471 if (!EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
16478 Function *F = I->getParent()->getParent();
16479 if (!L || L->getHeader() != I->getParent() || F->hasMinSize() ||
16480 F->hasOptSize())
16483 auto *SrcTy = dyn_cast<FixedVectorType>(I->getOperand(0)->getType());
16484 auto *DstTy = dyn_cast<FixedVectorType>(I->getType());
16492 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
16493 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
16499 // If the ZExt can be lowered to a single ZExt to the next power-of-2 and
16501 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
16502 if (TTI.getCastInstrCost(I->getOpcode(), DstTy, TruncDstType,
16505 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
16512 Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
16513 DstTy, Subtarget->isLittleEndian());
16516 ZExt->replaceAllUsesWith(Result);
16517 ZExt->eraseFromParent();
16522 if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16523 DstTy->getElementType()->isFloatTy()) {
16526 Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
16527 FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
16530 I->replaceAllUsesWith(UI);
16531 I->eraseFromParent();
16536 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16537 DstTy->getElementType()->isFloatTy()) {
16539 auto *Shuffle = createTblShuffleForSExt(Builder, I->getOperand(0),
16541 Subtarget->isLittleEndian());
16546 I->replaceAllUsesWith(SI);
16547 I->eraseFromParent();
16555 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
16556 SrcTy->getElementType()->isFloatTy() &&
16557 DstTy->getElementType()->isIntegerTy(8)) {
16559 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
16562 I->replaceAllUsesWith(TruncI);
16563 I->eraseFromParent();
16564 createTblForTrunc(cast<TruncInst>(TruncI), Subtarget->isLittleEndian());
16570 // per lane of the input that is represented using 1,2,3 or 4 128-bit table
16573 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
16574 ((SrcTy->getElementType()->isIntegerTy(32) ||
16575 SrcTy->getElementType()->isIntegerTy(64)) &&
16576 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
16577 createTblForTrunc(TI, Subtarget->isLittleEndian());
16600 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
16601 unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
16603 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
16609 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
16617 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
16618 auto EC = VecTy->getElementCount();
16623 if (isa<FixedVectorType>(VecTy) && !Subtarget->isNeonAvailable() &&
16624 (!Subtarget->useSVEForFixedLengthVectors() ||
16629 !Subtarget->isSVEorStreamingSVEAvailable())
16646 if (Subtarget->useSVEForFixedLengthVectors()) {
16648 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
16651 (!Subtarget->isNeonAvailable() || VecSize > 128))) {
16659 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
16663 if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
16664 return ScalableVectorType::get(VTy->getElementType(), 2);
16666 if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
16667 return ScalableVectorType::get(VTy->getElementType(), 4);
16669 if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
16670 return ScalableVectorType::get(VTy->getElementType(), 8);
16672 if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
16673 return ScalableVectorType::get(VTy->getElementType(), 8);
16675 if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
16676 return ScalableVectorType::get(VTy->getElementType(), 2);
16678 if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
16679 return ScalableVectorType::get(VTy->getElementType(), 4);
16681 if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
16682 return ScalableVectorType::get(VTy->getElementType(), 8);
16684 if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
16685 return ScalableVectorType::get(VTy->getElementType(), 16);
16701 return Intrinsic::getDeclaration(M, SVELoads[Factor - 2], {LDVTy});
16703 return Intrinsic::getDeclaration(M, NEONLoads[Factor - 2], {LDVTy, PtrTy});
16717 return Intrinsic::getDeclaration(M, SVEStores[Factor - 2], {STVTy});
16719 return Intrinsic::getDeclaration(M, NEONStores[Factor - 2], {STVTy, PtrTy});
16742 const DataLayout &DL = LI->getDataLayout();
16744 VectorType *VTy = Shuffles[0]->getType();
16759 Type *EltTy = FVTy->getElementType();
16760 if (EltTy->isPointerTy())
16762 FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
16764 // If we're going to generate more than one load, reset the sub-vector type
16766 FVTy = FixedVectorType::get(FVTy->getElementType(),
16767 FVTy->getNumElements() / NumLoads);
16775 Value *BaseAddr = LI->getPointerOperand();
16777 Type *PtrTy = LI->getPointerOperandType();
16778 Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
16779 LDVTy->getElementCount());
16781 Function *LdNFunc = getStructuredLoadFunction(LI->getModule(), Factor,
16784 // Holds sub-vectors extracted from the load intrinsic return values. The
16785 // sub-vectors are associated with the shufflevector instructions they will
16792 getSVEPredPatternFromNumElements(FVTy->getNumElements());
16793 if (Subtarget->getMinSVEVectorSizeInBits() ==
16794 Subtarget->getMaxSVEVectorSizeInBits() &&
16795 Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
16799 ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
16809 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
16810 FVTy->getNumElements() * Factor);
16818 // Extract and store the sub-vectors returned by the load intrinsic.
16828 ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0));
16831 if (EltTy->isPointerTy())
16833 SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
16834 FVTy->getNumElements()));
16840 // Replace uses of the shufflevector instructions with the sub-vectors
16842 // associated with more than one sub-vector, those sub-vectors will be
16848 SVI->replaceAllUsesWith(WideVec);
16860 Ptr->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
16863 if (It->isDebugOrPseudoInst())
16865 if (MaxLookupDist-- == 0)
16869 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
16872 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.sextOrTrunc(IdxWidth))
16914 auto *VecTy = cast<FixedVectorType>(SVI->getType());
16915 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
16917 unsigned LaneLen = VecTy->getNumElements() / Factor;
16918 Type *EltTy = VecTy->getElementType();
16921 const DataLayout &DL = SI->getDataLayout();
16932 Value *Op0 = SVI->getOperand(0);
16933 Value *Op1 = SVI->getOperand(1);
16938 if (EltTy->isPointerTy()) {
16941 cast<FixedVectorType>(Op0->getType())->getNumElements();
16952 // and sub-vector type to something legal.
16954 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
16960 Value *BaseAddr = SI->getPointerOperand();
16962 auto Mask = SVI->getShuffleMask();
16965 // If mask is `poison`, `Mask` may be a vector of -1s.
16972 // that points to BaseAddr+16 or BaseAddr-16 then it can be better left as a
16974 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
16976 hasNearbyPairedStore(SI->getIterator(), SI->getParent()->end(), BaseAddr,
16978 hasNearbyPairedStore(SI->getReverseIterator(), SI->getParent()->rend(),
16982 Type *PtrTy = SI->getPointerOperandType();
16983 Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
16984 STVTy->getElementCount());
16986 Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
16992 getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
16993 if (Subtarget->getMinSVEVectorSizeInBits() ==
16994 Subtarget->getMaxSVEVectorSizeInBits() &&
16995 Subtarget->getMinSVEVectorSizeInBits() ==
17000 ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
17021 StartMask = Mask[IdxJ] - j;
17037 ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));
17048 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
17060 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
17066 VectorType *VTy = cast<VectorType>(DI->getType()->getContainedType(0));
17067 const DataLayout &DL = DI->getDataLayout();
17074 if (UseScalable && !VTy->isScalableTy())
17080 VectorType::get(VTy->getElementType(),
17081 VTy->getElementCount().divideCoefficientBy(NumLoads));
17083 Type *PtrTy = LI->getPointerOperandType();
17084 Function *LdNFunc = getStructuredLoadFunction(DI->getModule(), Factor,
17092 Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue());
17094 Value *BaseAddr = LI->getPointerOperand();
17111 Builder.getInt64(I * LdTy->getElementCount().getKnownMinValue());
17118 Result = PoisonValue::get(DI->getType());
17128 DI->replaceAllUsesWith(Result);
17135 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
17141 VectorType *VTy = cast<VectorType>(II->getOperand(0)->getType());
17142 const DataLayout &DL = II->getDataLayout();
17149 if (UseScalable && !VTy->isScalableTy())
17155 VectorType::get(VTy->getElementType(),
17156 VTy->getElementCount().divideCoefficientBy(NumStores));
17158 Type *PtrTy = SI->getPointerOperandType();
17159 Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
17164 Value *BaseAddr = SI->getPointerOperand();
17169 Builder.CreateVectorSplat(StTy->getElementCount(), Builder.getTrue());
17171 Value *L = II->getOperand(0);
17172 Value *R = II->getOperand(1);
17181 Builder.getInt64(I * StTy->getElementCount().getKnownMinValue());
17182 L = Builder.CreateExtractVector(StTy, II->getOperand(0), Idx);
17183 R = Builder.CreateExtractVector(StTy, II->getOperand(1), Idx);
17198 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17199 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17200 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
17228 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17229 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17230 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
17255 // 12-bit optionally shifted immediates are legal for adds.
17273 if (!Subtarget->hasSVE2())
17304 // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
17317 const int64_t C1 = C1Node->getSExtValue();
17318 const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
17338 /// isLegalAddressingMode - Return true if the addressing mode represented
17345 // reg + 9-bit signed offset
17346 // reg + SIZE_IN_BYTES * 12-bit unsigned offset
17377 if (Ty->isScalableTy()) {
17379 // See if we have a foldable vscale-based offset, for vector types which
17390 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
17398 // No scalable offsets allowed for non-scalable types.
17405 if (Ty->isSized()) {
17412 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.BaseOffs,
17444 return Subtarget->hasFullFP16();
17457 switch (Ty->getScalarType()->getTypeID()) {
17474 // LR is a callee-save register, but we must treat it as clobbered by any call
17476 // as implicit-defs for stackmaps and patchpoints.
17491 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17492 N->getOpcode() == ISD::SRL) &&
17495 SDValue ShiftLHS = N->getOperand(0);
17496 EVT VT = N->getValueType(0);
17508 if (N->getOpcode() == ISD::SHL)
17509 if (auto *SHLC = dyn_cast<ConstantSDNode>(N->getOperand(1)))
17510 return SRLC->getZExtValue() == SHLC->getZExtValue();
17521 assert(N->getOpcode() == ISD::XOR &&
17522 (N->getOperand(0).getOpcode() == ISD::SHL ||
17523 N->getOperand(0).getOpcode() == ISD::SRL) &&
17527 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
17528 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
17531 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
17532 unsigned ShiftAmt = ShiftC->getZExtValue();
17533 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17534 if (N->getOperand(0).getOpcode() == ISD::SHL)
17535 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
17536 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
17545 assert(((N->getOpcode() == ISD::SHL &&
17546 N->getOperand(0).getOpcode() == ISD::SRL) ||
17547 (N->getOpcode() == ISD::SRL &&
17548 N->getOperand(0).getOpcode() == ISD::SHL)) &&
17549 "Expected shift-shift mask");
17551 if (!N->getOperand(0)->hasOneUse())
17555 EVT VT = N->getValueType(0);
17556 if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
17557 auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
17558 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17559 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
17572 assert(Ty->isIntegerTy());
17574 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17585 Val &= (1LL << 32) - 1;
17601 /// xor (sra X, elt_size(X)-1), -1
17606 EVT VT = N->getValueType(0);
17607 if (!Subtarget->hasNEON() || !VT.isVector())
17612 SDValue Shift = N->getOperand(0);
17613 SDValue Ones = N->getOperand(1);
17621 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
17645 if (N->getValueType(0) != MVT::i32)
17648 SDValue VecReduceOp0 = N->getOperand(0);
17651 if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
17656 if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
17657 ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
17660 SDValue SUB = ABS->getOperand(0);
17661 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
17662 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
17664 if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
17665 SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
17677 SDValue EXT0 = SUB->getOperand(0);
17678 SDValue EXT1 = SUB->getOperand(1);
17680 if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
17681 EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
17689 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
17692 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
17700 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
17703 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
17725 if (!ST->isNeonAvailable())
17728 if (!ST->hasDotProd())
17731 SDValue Op0 = N->getOperand(0);
17732 if (N->getValueType(0) != MVT::i32 || Op0.getValueType().isScalableVT() ||
17757 // For non-mla reductions B can be set to 1. For MLA we take the operand of
17781 return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
17804 DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), ConcatSDot16);
17820 DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
17821 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), VecReduceAdd16,
17876 // We can convert a UADDV(add(zext(64-bit source), zext(64-bit source))) into
17877 // UADDLV(concat), where the concat represents the 64-bit zext sources.
17879 // Look for add(zext(64-bit source), zext(64-bit source)), returning
17893 // Check zext VTs are the same and 64-bit length.
17917 SDValue A = N->getOperand(0);
17920 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
17941 if (isIntDivCheap(N->getValueType(0), Attr))
17944 EVT VT = N->getValueType(0);
17949 (VT.isFixedLengthVector() && Subtarget->useSVEForFixedLengthVectors()))
17957 // If the divisor is 2 or -2, the default expansion is better. It will add
17958 // (N->getValueType(0) >> (BitWidth - 1)) to it before shifting right.
17960 Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true))
17971 if (isIntDivCheap(N->getValueType(0), Attr))
17974 EVT VT = N->getValueType(0);
17978 if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
17991 SDValue N0 = N->getOperand(0);
17992 SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
18036 /// Calculates what the pre-extend type is, based on the extension
18040 /// pre-extend type is pulled directly from the operand, while other extend
18058 return TypeNode->getVT();
18066 uint32_t Mask = Constant->getZExtValue();
18093 SDValue Extend = BV->getOperand(0);
18107 // Restrict valid pre-extend data type
18114 for (SDValue Op : drop_begin(BV->ops())) {
18131 for (SDValue Op : BV->ops())
18142 cast<ShuffleVectorSDNode>(BV)->getMask());
18151 EVT VT = Mul->getValueType(0);
18155 SDValue Op0 = performBuildShuffleExtendCombine(Mul->getOperand(0), DAG);
18156 SDValue Op1 = performBuildShuffleExtendCombine(Mul->getOperand(1), DAG);
18163 return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0),
18164 Op1 ? Op1 : Mul->getOperand(1));
18167 // Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz
18170 EVT VT = N->getValueType(0);
18174 if (N->getOperand(0).getOpcode() != ISD::AND ||
18175 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
18178 SDValue And = N->getOperand(0);
18182 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
18189 V3 != (HalfSize - 1))
18207 EVT VT = N->getValueType(0);
18209 (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
18210 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) ||
18211 (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
18212 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) ||
18213 N->getOperand(0).getOperand(0).getValueType() !=
18214 N->getOperand(1).getOperand(0).getValueType())
18217 if (N->getOpcode() == ISD::MUL &&
18218 N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
18221 SDValue N0 = N->getOperand(0).getOperand(0);
18222 SDValue N1 = N->getOperand(1).getOperand(0);
18233 SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0);
18234 SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1);
18235 SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1);
18236 return DAG.getNode(N->getOpcode() == ISD::MUL ? N->getOperand(0).getOpcode()
18257 // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
18259 // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
18261 EVT VT = N->getValueType(0);
18262 SDValue N0 = N->getOperand(0);
18263 SDValue N1 = N->getOperand(1);
18267 auto IsAddSubWith1 = [&](SDValue V) -> bool {
18268 AddSubOpc = V->getOpcode();
18269 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
18270 SDValue Opnd = V->getOperand(1);
18271 MulOper = V->getOperand(0);
18275 return C->isOne();
18295 const APInt &ConstValue = C->getAPIntValue();
18300 (N0->getOpcode() == ISD::TRUNCATE &&
18301 (IsSVECntIntrinsic(N0->getOperand(0)))))
18308 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
18309 // 64-bit is 5 cycles, so this is always a win.
18321 if (N0->hasOneUse() && (isSignExtended(N0, DAG) ||
18326 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
18327 N->use_begin()->getOpcode() == ISD::SUB))
18362 // C = 45 is equal to (1+4)*(1+8), we don't decompose it into (1+2)*(16-1) as
18363 // the (2^N - 1) can't be execused via a single instruction.
18370 APInt NVMinus1 = N - 1;
18380 // C = 11 is equal to (1+4)*2+1, we don't decompose it into (1+2)*4-1 as
18381 // the (2^N - 1) can't be execused via a single instruction.
18383 APInt CVMinus1 = C - 1;
18387 APInt SCVMinus1 = CVMinus1.ashr(TrailingZeroes) - 1;
18397 // Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
18398 // C = 29 is equal to 1 - (1 - 2^3) * 2^2.
18400 APInt CVMinus1 = C - 1;
18416 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18417 // (mul x, (2^(N-M) - 1) * 2^M) => (sub (shl x, N), (shl x, M))
18422 // (mul x, 1 - (1 - 2^M) * 2^N))
18423 // => MV = sub (x - (shl x, M)); sub (x - (shl MV, N))
18424 APInt SCVMinus1 = ShiftedConstValue - 1;
18438 if (Subtarget->hasALULSLFast() &&
18440 APInt CVMMinus1 = CVM - 1;
18441 APInt CVNMinus1 = CVN - 1;
18450 if (Subtarget->hasALULSLFast() &&
18461 if (Subtarget->hasALULSLFast() &&
18472 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18473 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
18474 // (mul x, -(2^(N-M) - 1) * 2^M) => (sub (shl x, M), (shl x, N))
18475 APInt SCVPlus1 = -ShiftedConstValue + 1;
18476 APInt CVNegPlus1 = -ConstValue + 1;
18477 APInt CVNegMinus1 = -ConstValue - 1;
18495 // Take advantage of vector comparisons producing 0 or -1 in each lane to
18499 // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
18506 EVT VT = N->getValueType(0);
18507 if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
18508 N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
18509 VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
18513 // make the transformation for non-constant splats as well, but it's unclear
18517 dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
18519 if (!BV->isConstant())
18524 EVT IntVT = BV->getValueType(0);
18527 SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
18531 N->getOperand(0)->getOperand(0), MaskConst);
18546 EVT VT = N->getValueType(0);
18551 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
18554 // If the result of an integer load is only used by an integer-to-float
18556 // This eliminates an "integer-to-vector-move" UOP and improves throughput.
18557 SDValue N0 = N->getOperand(0);
18558 if (Subtarget->isNeonAvailable() && ISD::isNormalLoad(N0.getNode()) &&
18561 !cast<LoadSDNode>(N0)->isVolatile()) {
18563 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
18564 LN0->getPointerInfo(), LN0->getAlign(),
18565 LN0->getMemOperand()->getFlags());
18572 (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
18579 /// Fold a floating-point multiply by power of two into floating-point to
18580 /// fixed-point conversion.
18584 if (!Subtarget->isNeonAvailable())
18587 if (!N->getValueType(0).isSimple())
18590 SDValue Op = N->getOperand(0);
18597 SDValue ConstVec = Op->getOperand(1);
18604 (FloatBits != 16 || !Subtarget->hasFullFP16()))
18607 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
18612 // Avoid conversions where iN is larger than the float (e.g., float -> i64).
18619 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
18620 if (C == -1 || C == 0 || C > Bits)
18627 if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
18628 N->getOpcode() == ISD::FP_TO_UINT_SAT) {
18629 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18635 bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
18636 N->getOpcode() == ISD::FP_TO_SINT_SAT);
18642 Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
18645 FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
18652 EVT VT = N->getValueType(0);
18667 SDValue N0 = N->getOperand(0);
18671 SDValue N1 = N->getOperand(1);
18675 // InstCombine does (not (neg a)) => (add a -1).
18676 // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
18678 for (int i = 1; i >= 0; --i) {
18679 for (int j = 1; j >= 0; --j) {
18680 SDValue O0 = N0->getOperand(i);
18681 SDValue O1 = N1->getOperand(j);
18688 SubSibling = N0->getOperand(1 - i);
18689 AddSibling = N1->getOperand(1 - j);
18693 AddSibling = N0->getOperand(1 - i);
18694 SubSibling = N1->getOperand(1 - j);
18716 uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
18717 for (int i = 1; i >= 0; --i)
18718 for (int j = 1; j >= 0; --j) {
18721 if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) &&
18722 ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) &&
18724 return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
18725 N0->getOperand(1 - i), N1->getOperand(1 - j));
18727 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
18728 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
18734 ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
18735 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
18737 CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
18743 return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
18744 N0->getOperand(1 - i), N1->getOperand(1 - j));
18761 EVT VT = N->getValueType(0);
18762 SDValue CSel0 = N->getOperand(0);
18763 SDValue CSel1 = N->getOperand(1);
18769 if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
18782 if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
18797 if (N->getOpcode() == ISD::AND) {
18810 if (Op1 && Op1->getAPIntValue().isNegative() &&
18811 Op1->getAPIntValue().sgt(-32)) {
18813 // if the Op1 is a constant in the range [-31, -1], we
18816 DAG.getConstant(Op1->getAPIntValue().abs(), DL, Op1->getValueType(0));
18832 EVT VT = N->getValueType(0);
18866 if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
18867 if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
18868 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
18875 SDValue Op = N->getOperand(0);
18878 Op = Op->getOperand(0);
18887 SDValue Src = N->getOperand(0);
18888 unsigned Opc = Src->getOpcode();
18892 SDValue UnpkOp = Src->getOperand(0);
18893 SDValue Dup = N->getOperand(1);
18899 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
18903 uint64_t ExtVal = C->getZExtValue();
18905 auto MaskAndTypeMatch = [ExtVal](EVT VT) -> bool {
18913 EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
18917 // If this is 'and (uunpklo/hi (extload MemTy -> ExtTy)), mask', then check
18918 // to see if the mask is all-ones of size MemTy.
18920 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() == ISD::ZEXTLOAD ||
18921 MaskedLoadOp->getExtensionType() == ISD::EXTLOAD)) {
18922 EVT EltTy = MaskedLoadOp->getMemoryVT().getVectorElementType();
18928 APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
18932 Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
18936 UnpkOp->getValueType(0), UnpkOp, Dup);
18938 return DAG.getNode(Opc, DL, N->getValueType(0), And);
18946 if (isAllActivePredicate(DAG, N->getOperand(0)))
18947 return N->getOperand(1);
18948 if (isAllActivePredicate(DAG, N->getOperand(1)))
18949 return N->getOperand(0);
18954 SDValue Mask = N->getOperand(1);
18961 // SVE load instructions perform an implicit zero-extend, which makes them
18967 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
18984 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
19003 SDValue SetCC = N->getOperand(0);
19004 EVT VT = N->getValueType(0);
19010 for (auto U : N->uses())
19011 if (U->getOpcode() == ISD::SELECT)
19014 // Check if the operand is a SETCC node with floating-point comparison
19039 SDValue LHS = N->getOperand(0);
19040 SDValue RHS = N->getOperand(1);
19041 EVT VT = N->getValueType(0);
19102 SDValue LHS = N->getOperand(0);
19103 SDValue RHS = N->getOperand(1);
19104 EVT VT = N->getValueType(0);
19107 if (!N->getFlags().hasAllowReassociation())
19110 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
19122 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(1), B, N->getFlags()),
19124 VCMLA->setFlags(A->getFlags());
19174 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19176 if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
19179 SDValue N0 = N->getOperand(0);
19183 !isNullConstant(N->getOperand(1)))
19187 // flag-setting operation.
19194 return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE);
19197 // Materialize : Idx = (add (mul vscale, NumEls), -1)
19204 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19206 if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
19209 SDValue N0 = N->getOperand(0);
19215 // Idx == (add (mul vscale, NumEls), -1)
19216 SDValue Idx = N->getOperand(1);
19228 // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
19231 return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE);
19237 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
19244 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
19246 EVT VT = N->getValueType(0);
19248 bool IsStrict = N0->isStrictFPOpcode();
19250 // extract(dup x) -> x
19259 // ->
19264 if (isNullConstant(N1) && hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
19267 SDValue N00 = N0->getOperand(IsStrict ? 1 : 0);
19268 SDValue N01 = N0->getOperand(IsStrict ? 2 : 1);
19279 if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
19280 Other == Shuffle->getOperand(0)) {
19286 return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2);
19292 SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
19294 {N0->getOperand(0), Extract1, Extract2});
19308 EVT VT = N->getValueType(0);
19309 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
19310 unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
19319 // ->
19323 // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
19325 // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
19326 if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
19328 SDValue N00 = N0->getOperand(0);
19329 SDValue N10 = N1->getOperand(0);
19347 if (N->getOperand(0).getValueType() == MVT::v4i8 ||
19348 N->getOperand(0).getValueType() == MVT::v2i16 ||
19349 N->getOperand(0).getValueType() == MVT::v2i8) {
19350 EVT SrcVT = N->getOperand(0).getValueType();
19354 if (N->getNumOperands() % 2 == 0 &&
19355 all_of(N->op_values(), [SrcVT](SDValue V) {
19361 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
19362 LD->getExtensionType() == ISD::NON_EXTLOAD;
19365 EVT NVT = EVT::getVectorVT(*DAG.getContext(), FVT, N->getNumOperands());
19368 for (unsigned i = 0; i < N->getNumOperands(); i++) {
19369 SDValue V = N->getOperand(i);
19374 SDValue NewLoad = DAG.getLoad(FVT, dl, LD->getChain(),
19375 LD->getBasePtr(), LD->getMemOperand());
19380 return DAG.getBitcast(N->getValueType(0),
19390 // ->
19393 if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
19394 N1Opc == ISD::TRUNCATE && N->isOnlyUserOf(N0.getNode()) &&
19395 N->isOnlyUserOf(N1.getNode())) {
19397 return V->getOpcode() == ISD::XOR &&
19400 SDValue N00 = N0->getOperand(0);
19401 SDValue N10 = N1->getOperand(0);
19402 if (isBitwiseVectorNegate(N00) && N0->isOnlyUserOf(N00.getNode()) &&
19403 isBitwiseVectorNegate(N10) && N1->isOnlyUserOf(N10.getNode())) {
19408 N00->getOperand(0)),
19410 N10->getOperand(0))),
19420 // Optimise concat_vectors of two identical binops with a 128-bit destination
19422 // concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d))
19423 if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() &&
19424 DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() &&
19425 N1->hasOneUse()) {
19426 SDValue N00 = N0->getOperand(0);
19427 SDValue N01 = N0->getOperand(1);
19428 SDValue N10 = N1->getOperand(0);
19429 SDValue N11 = N1->getOperand(1);
19459 if (Imm != 1ULL << (ShtAmt - 1))
19464 // concat(rshrn(x), rshrn(y)) -> rshrn(concat(x, y))
19465 if (N->getNumOperands() == 2 && IsRSHRN(N0) &&
19477 DAG.getConstant(1ULL << (N0.getConstantOperandVal(1) - 1), dl, BVT));
19484 if (N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
19497 if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
19503 // Canonicalise concat_vectors so that the right-hand vector has as few
19504 // bit-casts as possible before its real operation. The primary matching
19506 // which depend on the operation being performed on this right-hand vector.
19512 if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
19514 SDValue RHS = N1->getOperand(0);
19521 dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
19537 EVT VT = N->getValueType(0);
19541 SDValue V = N->getOperand(0);
19544 // blocks this combine because the non-const case requires custom lowering.
19546 // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
19558 SDValue Vec = N->getOperand(0);
19559 SDValue SubVec = N->getOperand(1);
19560 uint64_t IdxVal = N->getConstantOperandVal(2);
19580 // Fold insert_subvector -> concat_vectors
19581 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
19582 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
19609 // register allocator to avoid cross-class register copies that aren't
19613 SDValue Op1 = N->getOperand(1);
19618 SDValue IID = N->getOperand(0);
19619 SDValue Shift = N->getOperand(2);
19622 EVT ResTy = N->getValueType(0);
19644 // AArch64 high-vector "long" operations are formed by performing the non-high
19653 // (dupv64 scalar) --> (extract_high (dup128 scalar))
19657 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
19693 N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops());
19749 SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
19755 // - csel 1, 0, cc
19756 // - csel 0, 1, !cc
19777 if (!TValue->isOne()) {
19783 return TValue->isOne() && FValue->isZero();
19791 isSetCC(Op->getOperand(0), Info));
19796 // -->
19801 assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
19802 SDValue LHS = Op->getOperand(0);
19803 SDValue RHS = Op->getOperand(1);
19822 ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
19823 : InfoAndKind.Info.Generic.Opnd0->getValueType();
19841 EVT VT = Op->getValueType(0);
19846 // ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
19848 EVT VT = N->getValueType(0);
19850 if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
19853 SDValue LHS = N->getOperand(0);
19854 SDValue RHS = N->getOperand(1);
19859 auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
19860 auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
19861 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
19864 SDValue Op1 = LHS->getOperand(0);
19865 SDValue Op2 = RHS->getOperand(0);
19875 EVT ValVT = Val1->getValueType(0);
19885 /// CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc)
19887 EVT VT = N->getValueType(0);
19888 if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD)
19891 SDValue LHS = N->getOperand(0);
19892 SDValue RHS = N->getOperand(1);
19918 (CTVal->isOne() || CFVal->isOne())) &&
19920 (CTVal->isOne() || CFVal->isAllOnes())))
19924 if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() &&
19925 !CFVal->isOne()) {
19931 // Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc)
19932 if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() &&
19933 !CFVal->isAllOnes()) {
19934 APInt C = -1 * CFVal->getAPIntValue();
19942 APInt ADDC = CTVal->getAPIntValue();
19947 assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) ||
19948 (LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) &&
19958 // ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
19960 EVT VT = N->getValueType(0);
19961 if (N->getOpcode() != ISD::ADD)
19964 SDValue Dot = N->getOperand(0);
19965 SDValue A = N->getOperand(1);
19994 // (neg (csel X, Y)) -> (csel (neg X), (neg Y))
20003 SDValue CSel = N->getOperand(1);
20004 if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse())
20025 // which act on the high-half of their inputs. They are normally matched by
20030 // -> uaddl2 vD, vN, vM
20041 MVT VT = N->getSimpleValueType(0);
20043 if (N->getOpcode() == ISD::ADD)
20049 SDValue LHS = N->getOperand(0);
20050 SDValue RHS = N->getOperand(1);
20074 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
20079 !Op.getNode()->hasAnyUseOfValue(0);
20103 SDValue CmpOp = Op->getOperand(2);
20115 SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1);
20120 return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(),
20121 Op->getOperand(0), Op->getOperand(1),
20127 SDValue LHS = N->getOperand(0);
20128 SDValue RHS = N->getOperand(1);
20129 SDValue Cond = N->getOperand(2);
20134 EVT VT = N->getValueType(0);
20146 EVT VT = N->getValueType(0);
20150 SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1),
20151 Elt2 = N->getOperand(2), Elt3 = N->getOperand(3);
20152 if (Elt0->getOpcode() == ISD::FP_ROUND &&
20153 Elt1->getOpcode() == ISD::FP_ROUND &&
20154 isa<ConstantSDNode>(Elt0->getOperand(1)) &&
20155 isa<ConstantSDNode>(Elt1->getOperand(1)) &&
20156 Elt0->getConstantOperandVal(1) == Elt1->getConstantOperandVal(1) &&
20157 Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20158 Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20160 isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) &&
20161 isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) &&
20162 Elt0->getOperand(0)->getOperand(0) ==
20163 Elt1->getOperand(0)->getOperand(0) &&
20164 Elt0->getOperand(0)->getConstantOperandVal(1) == 0 &&
20165 Elt1->getOperand(0)->getConstantOperandVal(1) == 1) {
20166 SDValue LowLanesSrcVec = Elt0->getOperand(0)->getOperand(0);
20169 if (Elt2->getOpcode() == ISD::UNDEF &&
20170 Elt3->getOpcode() == ISD::UNDEF) {
20172 } else if (Elt2->getOpcode() == ISD::FP_ROUND &&
20173 Elt3->getOpcode() == ISD::FP_ROUND &&
20174 isa<ConstantSDNode>(Elt2->getOperand(1)) &&
20175 isa<ConstantSDNode>(Elt3->getOperand(1)) &&
20176 Elt2->getConstantOperandVal(1) ==
20177 Elt3->getConstantOperandVal(1) &&
20178 Elt2->getOperand(0)->getOpcode() ==
20180 Elt3->getOperand(0)->getOpcode() ==
20183 isa<ConstantSDNode>(Elt2->getOperand(0)->getOperand(1)) &&
20184 isa<ConstantSDNode>(Elt3->getOperand(0)->getOperand(1)) &&
20185 Elt2->getOperand(0)->getOperand(0) ==
20186 Elt3->getOperand(0)->getOperand(0) &&
20187 Elt2->getOperand(0)->getConstantOperandVal(1) == 0 &&
20188 Elt3->getOperand(0)->getConstantOperandVal(1) == 1) {
20189 SDValue HighLanesSrcVec = Elt2->getOperand(0)->getOperand(0);
20199 Elt0->getOperand(1));
20206 SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
20207 if (Elt0->getOpcode() == ISD::FP_EXTEND &&
20208 Elt1->getOpcode() == ISD::FP_EXTEND &&
20209 Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20210 Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20211 Elt0->getOperand(0)->getOperand(0) ==
20212 Elt1->getOperand(0)->getOperand(0) &&
20214 isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) &&
20215 isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) &&
20216 Elt0->getOperand(0)->getConstantOperandVal(1) + 1 ==
20217 Elt1->getOperand(0)->getConstantOperandVal(1) &&
20220 Elt0->getOperand(0)->getConstantOperandVal(1) %
20223 SDValue SrcVec = Elt0->getOperand(0)->getOperand(0);
20228 SDValue SubvectorIdx = Elt0->getOperand(0)->getOperand(1);
20238 // extract subvector where the inner vector is any-extended to the
20249 SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
20250 // Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT.
20251 if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20252 Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20254 isa<ConstantSDNode>(Elt0->getOperand(1)) &&
20255 isa<ConstantSDNode>(Elt1->getOperand(1)) &&
20257 Elt0->getOperand(0) == Elt1->getOperand(0) &&
20259 Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1) &&
20262 Elt0->getConstantOperandVal(1) % VT.getVectorMinNumElements() == 0) {
20263 SDValue VecToExtend = Elt0->getOperand(0);
20268 SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
20280 EVT VT = N->getValueType(0);
20281 SDValue N0 = N->getOperand(0);
20300 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
20309 uint64_t AndMask = CSD->getZExtValue();
20318 // (N - Y) + Z --> (Z - Y) + N
20341 EVT VT = N->getValueType(0);
20352 if (N->getOpcode() != ISD::ADD)
20357 EVT VT = N->getValueType(0);
20362 SDValue LHS = N->getOperand(0);
20363 SDValue RHS = N->getOperand(1);
20377 // with LSL (shift > 4). For the rest of processors, this is no-op for
20390 if (N->getOpcode() != ISD::SUB)
20393 SDValue Add = N->getOperand(1);
20394 SDValue X = N->getOperand(0);
20412 EVT VT = N->getValueType(0);
20434 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::SUB)
20437 if (!N->getValueType(0).isFixedLengthVector())
20440 auto performOpt = [&DAG, &N](SDValue Op0, SDValue Op1) -> SDValue {
20444 if (!cast<ConstantSDNode>(Op1->getOperand(1))->isZero())
20447 SDValue MulValue = Op1->getOperand(0);
20460 DAG.getNode(N->getOpcode(), SDLoc(N), ScalableVT, {ScaledOp, MulValue});
20461 return convertFromScalableVector(DAG, N->getValueType(0), NewValue);
20464 if (SDValue res = performOpt(N->getOperand(0), N->getOperand(1)))
20466 else if (N->getOpcode() == ISD::ADD)
20467 return performOpt(N->getOperand(1), N->getOperand(0));
20475 EVT VT = N->getValueType(0);
20477 DAG.getTargetLoweringInfo().isOperationExpand(N->getOpcode(), MVT::v1i64))
20479 SDValue Op0 = N->getOperand(0);
20480 SDValue Op1 = N->getOperand(1);
20504 DAG.getNode(N->getOpcode(), DL, MVT::v1i64, Op0, Op1),
20510 if (!BV->hasOneUse())
20513 if (!Ld || !Ld->isSimple())
20521 if (!Ld || !Ld->isSimple() || !BV.getOperand(Op).hasOneUse())
20552 if (SV1->getMaskElt(I) != I ||
20553 SV1->getMaskElt(I + NumSubElts) != I + NumSubElts ||
20554 SV1->getMaskElt(I + NumSubElts * 2) != I + NumSubElts * 2 ||
20555 SV1->getMaskElt(I + NumSubElts * 3) != I + NumElts)
20558 if (SV2->getMaskElt(I) != I ||
20559 SV2->getMaskElt(I + NumSubElts) != I + NumSubElts ||
20560 SV2->getMaskElt(I + NumSubElts * 2) != I + NumElts)
20563 auto *Ld0 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(0));
20564 auto *Ld1 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(1));
20565 auto *Ld2 = dyn_cast<LoadSDNode>(SV2->getOperand(1).getOperand(0));
20567 if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() ||
20568 !Ld2->isSimple() || !Ld3->isSimple())
20593 unsigned Size = get<0>(L)->getValueType(0).getSizeInBits();
20594 return Size == get<1>(L)->getValueType(0).getSizeInBits() &&
20635 EVT VT = N->getValueType(0);
20641 SDValue Other = N->getOperand(0);
20642 SDValue Shift = N->getOperand(1);
20643 if (Shift.getOpcode() != ISD::SHL && N->getOpcode() != ISD::SUB)
20692 SDValue Load = DAG.getLoad(DLoadVT, SDLoc(L0), L0->getChain(),
20693 L0->getBasePtr(), L0->getPointerInfo(),
20694 L0->getOriginalAlign());
20703 for (const auto &[O0, O1] : zip(Op0->op_values(), Op1->op_values()))
20749 return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
20780 // Massage DAGs which we can use the high-half "long" operations on into
20783 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
20793 SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
20794 SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
20800 // just as well use the non-high version) so look for a corresponding extract
20814 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
20816 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
20817 N->getOperand(0), LHS, RHS);
20821 MVT ElemTy = N->getSimpleValueType(0).getScalarType();
20825 if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
20829 if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
20835 } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
20836 ShiftAmount = CVN->getSExtValue();
20842 return N->getOperand(1);
20877 ShiftAmount = -ShiftAmount;
20884 EVT VT = N->getValueType(0);
20885 SDValue Op = N->getOperand(1);
20892 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
20894 DAG.getConstant(-ShiftAmount, dl, MVT::i32));
20895 if (N->getValueType(0) == MVT::i64)
20902 if (N->getValueType(0) == MVT::i64)
20915 SDValue AndN = N->getOperand(2);
20920 if (!CMask || CMask->getZExtValue() != Mask)
20924 N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
20930 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
20932 N->getOperand(1).getSimpleValueType(),
20933 N->getOperand(1)),
20939 SDValue Op1 = N->getOperand(1);
20940 SDValue Op2 = N->getOperand(2);
20946 SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
20947 SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
20948 SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
20949 SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
20950 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
20955 SDValue Scalar = N->getOperand(3);
20961 SDValue Passthru = N->getOperand(1);
20962 SDValue Pred = N->getOperand(2);
20963 return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
20970 EVT VT = N->getValueType(0);
20974 // Current lowering only supports the SVE-ACLE types.
20984 SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
20985 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
20986 SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
20999 SDValue Comparator = N->getOperand(3);
21003 EVT VT = N->getValueType(0);
21004 EVT CmpVT = N->getOperand(2).getValueType();
21005 SDValue Pred = N->getOperand(1);
21022 int64_t ImmVal = CN->getSExtValue();
21023 if (ImmVal >= -16 && ImmVal <= 15)
21036 uint64_t ImmVal = CN->getZExtValue();
21051 N->getOperand(2), Splat, DAG.getCondCode(CC));
21099 SDValue Pred = N->getOperand(1);
21100 SDValue VecToReduce = N->getOperand(2);
21104 EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
21110 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21118 SDValue Pred = N->getOperand(1);
21119 SDValue VecToReduce = N->getOperand(2);
21127 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21135 SDValue Pred = N->getOperand(1);
21136 SDValue InitVal = N->getOperand(2);
21137 SDValue VecToReduce = N->getOperand(3);
21150 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
21160 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
21161 assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
21162 SDValue Pg = N->getOperand(1);
21163 SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
21164 SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
21169 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
21171 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
21184 if (!Subtarget->hasSVE2p1())
21187 if (!N->hasNUsesOfValue(2, 0))
21190 const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
21194 auto It = N->use_begin();
21198 if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
21199 Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR)
21202 uint64_t OffLo = Lo->getConstantOperandVal(1);
21203 uint64_t OffHi = Hi->getConstantOperandVal(1);
21213 EVT HalfVec = Lo->getValueType(0);
21214 if (HalfVec != Hi->getValueType(0) ||
21222 SDValue Idx = N->getOperand(1);
21223 SDValue TC = N->getOperand(2);
21230 {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
21262 return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
21263 N->getOperand(1), N->getOperand(2));
21265 return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
21266 N->getOperand(1), N->getOperand(2));
21268 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
21269 N->getOperand(1), N->getOperand(2));
21271 return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
21272 N->getOperand(1), N->getOperand(2));
21274 return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0),
21275 N->getOperand(1), N->getOperand(2));
21277 return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0),
21278 N->getOperand(1), N->getOperand(2));
21280 return DAG.getNode(AArch64ISD::PMULL, SDLoc(N), N->getValueType(0),
21281 N->getOperand(1), N->getOperand(2));
21293 return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
21294 N->getOperand(1), N->getOperand(2));
21296 return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
21297 N->getOperand(1), N->getOperand(2));
21306 if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
21331 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
21332 N->getOperand(1));
21336 return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0),
21337 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21339 return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0),
21340 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21342 return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0),
21343 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21345 return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0),
21346 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21348 return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0),
21349 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21351 return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0),
21352 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21354 return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0),
21355 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21357 return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0),
21358 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21360 return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0),
21361 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21363 return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0),
21364 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21366 return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0),
21367 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21369 return DAG.getNode(AArch64ISD::FDIV_PRED, SDLoc(N), N->getValueType(0),
21370 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21372 return DAG.getNode(AArch64ISD::FMAX_PRED, SDLoc(N), N->getValueType(0),
21373 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21375 return DAG.getNode(AArch64ISD::FMAXNM_PRED, SDLoc(N), N->getValueType(0),
21376 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21378 return DAG.getNode(AArch64ISD::FMA_PRED, SDLoc(N), N->getValueType(0),
21379 N->getOperand(1), N->getOperand(3), N->getOperand(4),
21380 N->getOperand(2));
21382 return DAG.getNode(AArch64ISD::FMIN_PRED, SDLoc(N), N->getValueType(0),
21383 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21385 return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0),
21386 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21388 return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0),
21389 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21391 return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0),
21392 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21394 return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2),
21395 N->getOperand(3));
21397 return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2),
21398 N->getOperand(3));
21402 return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), N->getOperand(2),
21403 N->getOperand(3));
21405 return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0),
21406 N->getOperand(2), N->getOperand(3));
21408 return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2),
21409 N->getOperand(3));
21411 return DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), N->getOperand(2),
21412 N->getOperand(3));
21414 return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
21415 N->getOperand(2), N->getOperand(3));
21417 return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
21418 N->getOperand(2), N->getOperand(3));
21420 return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0),
21421 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21423 return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0),
21424 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21428 return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
21429 N->getOperand(2), N->getOperand(3));
21433 return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
21434 N->getOperand(2), N->getOperand(3));
21436 return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
21437 N->getOperand(1), N->getOperand(2));
21439 return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
21440 N->getOperand(1), N->getOperand(2));
21442 return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
21443 N->getOperand(1), N->getOperand(2));
21445 return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
21446 N->getOperand(1), N->getOperand(2));
21448 return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
21449 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21451 if (!N->getOperand(2).getValueType().isFloatingPoint())
21453 N->getValueType(0), N->getOperand(1), N->getOperand(2),
21454 N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
21457 if (!N->getOperand(2).getValueType().isFloatingPoint())
21459 N->getValueType(0), N->getOperand(1), N->getOperand(2),
21460 N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
21465 N->getValueType(0), N->getOperand(1), N->getOperand(2),
21466 N->getOperand(3), DAG.getCondCode(ISD::SETGE));
21471 N->getValueType(0), N->getOperand(1), N->getOperand(2),
21472 N->getOperand(3), DAG.getCondCode(ISD::SETGT));
21477 N->getValueType(0), N->getOperand(1), N->getOperand(2),
21478 N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
21483 N->getValueType(0), N->getOperand(1), N->getOperand(2),
21484 N->getOperand(3), DAG.getCondCode(ISD::SETNE));
21488 N->getValueType(0), N->getOperand(1), N->getOperand(2),
21489 N->getOperand(3), DAG.getCondCode(ISD::SETUO));
21504 return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
21505 N->getOperand(1), N->getOperand(2), N->getOperand(3));
21527 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
21530 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
21533 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
21542 unsigned OC = N->getOpcode();
21556 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
21557 N->getOperand(0)->getOpcode() == ISD::SETCC);
21558 const SDValue SetCC = N->getOperand(0);
21562 if (!CCOp0->getValueType(0).isInteger() ||
21563 !CCOp1->getValueType(0).isInteger())
21567 cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
21575 DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
21577 DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
21580 SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
21581 cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
21594 if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
21595 (N->getOperand(0).getOpcode() == ISD::ABDU ||
21596 N->getOperand(0).getOpcode() == ISD::ABDS)) {
21597 SDNode *ABDNode = N->getOperand(0).getNode();
21603 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
21606 if (N->getValueType(0).isFixedLengthVector() &&
21607 N->getOpcode() == ISD::SIGN_EXTEND &&
21608 N->getOperand(0)->getOpcode() == ISD::SETCC)
21631 OrigAlignment, St.getMemOperand()->getFlags());
21634 if (BasePtr->getOpcode() == ISD::ADD &&
21635 isa<ConstantSDNode>(BasePtr->getOperand(1))) {
21636 BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
21637 BasePtr = BasePtr->getOperand(0);
21641 while (--NumVecElts) {
21648 St.getMemOperand()->getFlags());
21686 EVT VT = N->getValueType(0);
21696 SDValue Ops[] = { N->getOperand(0), // Chain
21697 N->getOperand(2), // Pg
21698 N->getOperand(3), // Base
21712 EVT VT = N->getValueType(0);
21713 EVT PtrTy = N->getOperand(3).getValueType();
21721 SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
21722 MINode->getOperand(3), DAG.getUNDEF(PtrTy),
21723 MINode->getOperand(2), PassThru,
21724 MINode->getMemoryVT(), MINode->getMemOperand(),
21741 EVT VT = N->getValueType(0);
21747 SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
21759 SDValue Data = N->getOperand(2);
21773 SDValue Ops[] = { N->getOperand(0), // Chain
21775 N->getOperand(4), // Base
21776 N->getOperand(3), // Pg
21780 return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
21786 SDValue Data = N->getOperand(2);
21788 EVT PtrTy = N->getOperand(4).getValueType();
21794 return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
21795 DAG.getUNDEF(PtrTy), MINode->getOperand(3),
21796 MINode->getMemoryVT(), MINode->getMemOperand(),
21803 /// if the zero constant is not re-used, since one instructions and one register
21849 int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
21850 if (Offset < -512 || Offset > 504)
21904 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
21921 uint64_t IndexVal = CIndex->getZExtValue();
21940 if (S->isVolatile() || S->isIndexed())
21943 SDValue StVal = S->getValue();
21959 if (!Subtarget->isMisaligned128StoreSlow())
21962 // Don't split at -Oz.
21967 // those up regresses performance on micro-benchmarks and olden/bh.
21976 if (VT.getSizeInBits() != 128 || S->getAlign() >= Align(16) ||
21977 S->getAlign() <= Align(2))
21995 SDValue BasePtr = S->getBasePtr();
21997 DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
21998 S->getAlign(), S->getMemOperand()->getFlags());
22002 S->getPointerInfo(), S->getAlign(),
22003 S->getMemOperand()->getFlags());
22007 assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
22009 // splice(pg, op1, undef) -> op1
22010 if (N->getOperand(2).isUndef())
22011 return N->getOperand(1);
22018 assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
22019 N->getOpcode() == AArch64ISD::UUNPKLO) &&
22022 // uunpklo/hi undef -> undef
22023 if (N->getOperand(0).isUndef())
22024 return DAG.getUNDEF(N->getValueType(0));
22029 if (N->getOperand(0).getOpcode() == ISD::MLOAD &&
22030 N->getOpcode() == AArch64ISD::UUNPKLO) {
22031 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0));
22032 SDValue Mask = MLD->getMask();
22035 if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD &&
22036 SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
22037 (MLD->getPassThru()->isUndef() ||
22038 isZerosVector(MLD->getPassThru().getNode()))) {
22039 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
22040 unsigned PgPattern = Mask->getConstantOperandVal(0);
22041 EVT VT = N->getValueType(0);
22051 VT, DL, MLD->getChain(), MLD->getBasePtr(), MLD->getOffset(), Mask,
22052 PassThru, MLD->getMemoryVT(), MLD->getMemOperand(),
22053 MLD->getAddressingMode(), ISD::ZEXTLOAD);
22066 if (N->getOpcode() != AArch64ISD::UZP1)
22068 SDValue Op0 = N->getOperand(0);
22069 EVT SrcVT = Op0->getValueType(0);
22070 EVT DstVT = N->getValueType(0);
22078 // uzp1(rshrnb(uunpklo(X),C), rshrnb(uunpkhi(X), C)) -> urshr(X, C)
22080 assert(N->getOpcode() == AArch64ISD::UZP1 && "Only UZP1 expected.");
22081 SDValue Op0 = N->getOperand(0);
22082 SDValue Op1 = N->getOperand(1);
22083 EVT ResVT = N->getValueType(0);
22111 // t1 = nxv8i16 add(X, 1 << (ShiftValue - 1))
22121 EVT VT = Srl->getValueType(0);
22122 if (!VT.isScalableVector() || !Subtarget->hasSVE2())
22149 SDValue Op0 = N->getOperand(0);
22150 SDValue Op1 = N->getOperand(1);
22151 EVT ResVT = N->getValueType(0);
22153 // uzp(extract_lo(x), extract_hi(x)) -> extract_lo(uzp x, x)
22165 SDValue Uzp = DAG.getNode(N->getOpcode(), DL, WidenedResVT, SourceVec,
22173 if (N->getOpcode() == AArch64ISD::UZP2)
22176 // uzp1(x, undef) -> concat(truncate(x), undef)
22232 // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y)
22251 // truncating uzp1(x, y) -> xtn(concat (x, y))
22264 // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
22317 unsigned Opc = N->getOpcode();
22335 SDValue Chain = N->getOperand(0);
22336 SDValue Pg = N->getOperand(1);
22337 SDValue Base = N->getOperand(2);
22338 SDValue Offset = N->getOperand(3);
22339 SDValue Ty = N->getOperand(4);
22341 EVT ResVT = N->getValueType(0);
22353 EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
22355 // If the predicate for the sign- or zero-extended offset is the
22356 // same as the predicate used for this load and the sign-/zero-extension
22357 // was from a 32-bits...
22378 assert(N->getOpcode() == AArch64ISD::VASHR ||
22379 N->getOpcode() == AArch64ISD::VLSHR);
22381 SDValue Op = N->getOperand(0);
22384 unsigned ShiftImm = N->getConstantOperandVal(1);
22388 if (N->getOpcode() == AArch64ISD::VASHR &&
22390 N->getOperand(1) == Op.getOperand(1))
22395 if (N->getFlags().hasExact())
22408 // sunpklo(sext(pred)) -> sext(extract_low_half(pred))
22411 if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND &&
22412 N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
22414 SDValue CC = N->getOperand(0)->getOperand(0);
22415 auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
22418 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk);
22424 /// Target-specific DAG combine function for post-increment LD1 (lane) and
22425 /// post-increment LD1R.
22433 EVT VT = N->getValueType(0);
22439 SDNode *LD = N->getOperand(LoadIdx).getNode();
22441 if (LD->getOpcode() != ISD::LOAD)
22447 Lane = N->getOperand(2);
22449 if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
22454 EVT MemVT = LoadSDN->getMemoryVT();
22461 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
22472 if (N->hasOneUse()) {
22473 unsigned UseOpc = N->use_begin()->getOpcode();
22478 SDValue Addr = LD->getOperand(1);
22479 SDValue Vector = N->getOperand(0);
22481 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
22482 Addr.getNode()->use_end(); UI != UE; ++UI) {
22484 if (User->getOpcode() != ISD::ADD
22489 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
22491 uint32_t IncVal = CInc->getZExtValue();
22511 Ops.push_back(LD->getOperand(0)); // Chain
22524 LoadSDN->getMemOperand());
22558 assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
22562 if (!Store->isTruncatingStore() || Store->isIndexed())
22564 SDValue Ext = Store->getValue();
22569 SDValue Orig = Ext->getOperand(0);
22570 if (Store->getMemoryVT() != Orig.getValueType())
22572 return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
22573 Store->getBasePtr(), Store->getMemOperand());
22596 EVT MemVT = LD->getMemoryVT();
22598 LD->getOriginalAlign() >= 4)
22603 SDValue Chain = LD->getChain();
22604 SDValue BasePtr = LD->getBasePtr();
22605 MachineMemOperand *MMO = LD->getMemOperand();
22606 assert(LD->getOffset().isUndef() && "undef offset expected");
22635 // nontemporal loads larger than 256-bits loads for odd types so LDNPQ 256-bit
22641 if (Subtarget->supportsAddressTopByteIgnored())
22642 performTBISimplification(N->getOperand(1), DCI, DAG);
22645 if (LD->isVolatile() || !Subtarget->isLittleEndian())
22651 if (!LD->isNonTemporal())
22654 EVT MemVT = LD->getMemoryVT();
22661 SDValue Chain = LD->getChain();
22662 SDValue BasePtr = LD->getBasePtr();
22663 SDNodeFlags Flags = LD->getFlags();
22666 // Replace any non temporal load over 256-bit with a series of 256 bit loads
22667 // and a scalar/vector load less than 256. This way we can utilize 256-bit
22673 // Create all 256-bit loads starting from offset 0 and up to Num256Loads-1*32.
22678 Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
22680 NewVT, DL, Chain, NewPtr, LD->getPointerInfo().getWithOffset(PtrOffset),
22681 NewAlign, LD->getMemOperand()->getFlags(), LD->getAAInfo());
22688 // 256-bit loads and inserting the remaining load to it. We extract the
22691 unsigned PtrOffset = (MemVT.getSizeInBits() - BitsRemaining) / 8;
22697 Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
22700 LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
22701 LD->getMemOperand()->getFlags(), LD->getAAInfo());
22737 for (SDValue Operand : Op->op_values()) {
22774 unsigned BitsPerElement = std::max(64 / NumElts, 8u); // >= 64-bit vector
22794 // create 8x 16-bit values, and the perform the vector reduce.
22814 unsigned MaxBitMask = 1u << (VecVT.getVectorNumElements() - 1);
22829 if (!Store->isTruncatingStore())
22833 SDValue VecOp = Store->getValue();
22835 EVT MemVT = Store->getMemoryVT();
22854 return DAG.getStore(Store->getChain(), DL, ExtendedBits, Store->getBasePtr(),
22855 Store->getMemOperand());
22867 SDValue Value = ST->getValue();
22870 if (ST->isVolatile() || !Subtarget->isLittleEndian() ||
22875 assert(ST->getOffset().isUndef() && "undef offset expected");
22879 Value->getOperand(0).getValueType().getVectorElementType(), 4);
22883 {UndefVector, Value->getOperand(0), DAG.getVectorIdxConstant(0, DL)});
22889 SDValue Chain = ST->getChain();
22890 MachineMemOperand *MMO = ST->getMemOperand();
22895 SDValue Ptr2 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset2, DL);
22901 SDValue Ptr1 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset1, DL);
22906 Chain = DAG.getStore(Chain, DL, E0, ST->getBasePtr(),
22916 SDValue Chain = ST->getChain();
22917 SDValue Value = ST->getValue();
22918 SDValue Ptr = ST->getBasePtr();
22934 Value.getNode()->hasOneUse() && ST->isUnindexed() &&
22935 Subtarget->useSVEForFixedLengthVectors() &&
22937 ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
22940 ST->getMemoryVT(), ST->getMemOperand());
22945 if (Subtarget->supportsAddressTopByteIgnored() &&
22946 performTBISimplification(N->getOperand(2), DCI, DAG))
22955 if (ST->isTruncatingStore()) {
22956 EVT StoreVT = ST->getMemoryVT();
22960 trySimplifySrlAddToRshrnb(ST->getOperand(1), DAG, Subtarget)) {
22961 return DAG.getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(),
22962 StoreVT, ST->getMemOperand());
22974 SDValue Value = MST->getValue();
22975 SDValue Mask = MST->getMask();
22981 if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() &&
22982 MST->isUnindexed() && Mask->getOpcode() == AArch64ISD::PTRUE &&
22991 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
22992 unsigned PgPattern = Mask->getConstantOperandVal(0);
23000 return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0),
23001 MST->getBasePtr(), MST->getOffset(), Mask,
23002 MST->getMemoryVT(), MST->getMemOperand(),
23003 MST->getAddressingMode(),
23010 if (MST->isTruncatingStore()) {
23011 EVT ValueVT = Value->getValueType(0);
23012 EVT MemVT = MST->getMemoryVT();
23016 return DAG.getMaskedStore(MST->getChain(), DL, Rshrnb, MST->getBasePtr(),
23017 MST->getOffset(), MST->getMask(),
23018 MST->getMemoryVT(), MST->getMemOperand(),
23019 MST->getAddressingMode(), true);
23037 // ->
23052 // ->
23083 while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG))
23093 EVT DataVT = N->getOperand(1).getValueType();
23095 // will later be re-extended to 64 bits in legalization
23098 if (ISD::isVectorShrinkable(Index.getNode(), 32, N->isIndexSigned())) {
23108 Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue();
23118 Stride = Step << Shift->getZExtValue();
23156 SDValue Chain = MGS->getChain();
23157 SDValue Scale = MGS->getScale();
23158 SDValue Index = MGS->getIndex();
23159 SDValue Mask = MGS->getMask();
23160 SDValue BasePtr = MGS->getBasePtr();
23161 ISD::MemIndexType IndexType = MGS->getIndexType();
23169 SDValue PassThru = MGT->getPassThru();
23172 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
23173 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
23176 SDValue Data = MSC->getValue();
23178 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL,
23179 Ops, MSC->getMemOperand(), IndexType,
23180 MSC->isTruncatingStore());
23183 /// Target-specific DAG combine function for NEON load/store intrinsics
23191 unsigned AddrOpIdx = N->getNumOperands() - 1;
23192 SDValue Addr = N->getOperand(AddrOpIdx);
23195 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
23196 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
23198 if (User->getOpcode() != ISD::ADD ||
23219 unsigned IntNo = N->getConstantOperandVal(1);
23268 VecTy = N->getOperand(2).getValueType();
23270 VecTy = N->getValueType(0);
23273 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
23275 uint32_t IncVal = CInc->getZExtValue();
23284 Ops.push_back(N->getOperand(0)); // Incoming chain
23288 Ops.push_back(N->getOperand(i));
23304 MemInt->getMemoryVT(),
23305 MemInt->getMemOperand());
23326 switch(V.getNode()->getOpcode()) {
23331 if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
23332 || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
23333 ExtType = LoadNode->getExtensionType();
23339 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
23340 if ((TypeNode->getVT() == MVT::i8 && width == 8)
23341 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
23348 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
23349 if ((TypeNode->getVT() == MVT::i8 && width == 8)
23350 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
23358 return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
23359 1LL << (width - 1);
23370 // +-------------+ +-------------+ +-------------+ +-------------+
23372 // +-------------+ +-------------+ +-------------+ +-------------+
23374 // V V | +----------+
23375 // +-------------+ +----+ | |
23377 // +-------------+ +----+ | |
23380 // +-------------+ | |
23382 // +-------------+ | |
23384 // +-----+ | |
23387 // +-------------+
23389 // +-------------+
23401 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
23431 // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
23441 AddConstant -= (1 << (width-1));
23447 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
23462 (AddConstant <= 0 && CompConstant >= -1 &&
23502 // (X & C) >u Mask --> (X & (C & (~Mask)) != 0
23503 // (X & C) <u Pow2 --> (X & (C & ~(Pow2-1)) == 0
23508 ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
23512 APInt SubsAP = SubsC->getAPIntValue();
23522 ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
23526 APInt MaskAP = CC == AArch64CC::HI ? SubsAP : (SubsAP - 1);
23529 APInt AndSMask = (~MaskAP) & AndC->getAPIntValue();
23531 AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
23532 DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
23535 N->getOperand(CCIndex)->getValueType(0));
23545 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
23547 return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
23555 unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
23556 SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
23557 unsigned CondOpcode = SubsNode->getOpcode();
23559 if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0) ||
23560 !SubsNode->hasOneUse())
23566 SDNode *AndNode = SubsNode->getOperand(0).getNode();
23569 if (AndNode->getOpcode() != ISD::AND)
23576 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
23577 uint32_t CNV = CN->getZExtValue();
23587 SDValue AddValue = AndNode->getOperand(0);
23594 SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
23595 SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
23596 SDValue SubsInputValue = SubsNode->getOperand(1);
23613 cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
23614 cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
23619 SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
23620 SubsNode->getValueType(1));
23621 SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
23642 SDValue Chain = N->getOperand(0);
23643 SDValue Dest = N->getOperand(1);
23644 SDValue CCVal = N->getOperand(2);
23645 SDValue Cmp = N->getOperand(3);
23648 unsigned CC = CCVal->getAsZExtVal();
23658 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
23693 unsigned CC = N->getConstantOperandVal(2);
23694 SDValue SUBS = N->getOperand(3);
23698 Zero = N->getOperand(0);
23699 CTTZ = N->getOperand(1);
23701 Zero = N->getOperand(1);
23702 CTTZ = N->getOperand(0);
23728 DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType());
23741 SDValue L = Op->getOperand(0);
23742 SDValue R = Op->getOperand(1);
23744 static_cast<AArch64CC::CondCode>(Op->getConstantOperandVal(2));
23746 SDValue OpCmp = Op->getOperand(3);
23758 SDValue X = CmpLHS->getOperand(0);
23759 SDValue Y = CmpLHS->getOperand(1);
23769 if (CX->getAPIntValue() == CY->getAPIntValue())
23773 static_cast<AArch64CC::CondCode>(CmpLHS->getConstantOperandVal(2));
23774 SDValue Cond = CmpLHS->getOperand(3);
23787 EVT VT = Op->getValueType(0);
23797 // CSEL x, x, cc -> x
23798 if (N->getOperand(0) == N->getOperand(1))
23799 return N->getOperand(0);
23804 // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
23805 // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
23812 // Try to re-use an already extended operand of a vector SetCC feeding a
23816 EVT Op0MVT = Op->getOperand(0).getValueType();
23817 if (!Op0MVT.isVector() || Op->use_empty())
23822 SDNode *FirstUse = *Op->use_begin();
23823 if (FirstUse->getOpcode() != ISD::VSELECT)
23825 EVT UseMVT = FirstUse->getValueType(0);
23828 if (any_of(Op->uses(), [&UseMVT](const SDNode *N) {
23829 return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
23834 if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V))
23840 ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get();
23842 // split the SET_CC and re-use the extended version of the operand.
23844 Op->getOperand(0));
23846 Op->getOperand(0));
23849 Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1));
23852 Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
23857 Op0ExtV, Op1ExtV, Op->getOperand(2));
23863 SDValue Vec = N->getOperand(0);
23869 return getVectorBitwiseReduce(N->getOpcode(), Vec, N->getValueType(0), DL,
23879 assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
23880 SDValue LHS = N->getOperand(0);
23881 SDValue RHS = N->getOperand(1);
23882 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
23884 EVT VT = N->getValueType(0);
23891 LHS->getOpcode() == AArch64ISD::CSEL &&
23892 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
23893 LHS->hasOneUse()) {
23907 // setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
23909 LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
23910 LHS->getConstantOperandVal(1) < VT.getScalarSizeInBits() &&
23911 LHS->hasOneUse()) {
23912 EVT TstVT = LHS->getValueType(0);
23915 uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1);
23916 SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0),
23918 return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2));
23924 // setcc (iN (bitcast (vNi1 X))), -1, (eq|ne)
23925 // ==> setcc (iN (sext (i1 (vecreduce_and (vNi1 X))))), -1, (eq|ne)
23929 LHS->getOpcode() == ISD::BITCAST) {
23930 EVT ToVT = LHS->getValueType(0);
23931 EVT FromVT = LHS->getOperand(0).getValueType();
23936 DL, MVT::i1, LHS->getOperand(0));
23950 // Replace a flag-setting operator (eg ANDS) with the generic version
23956 SDValue LHS = N->getOperand(0);
23957 SDValue RHS = N->getOperand(1);
23958 EVT VT = N->getValueType(0);
23961 if (!N->hasAnyUseOfValue(1)) {
23962 SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
23967 // Combine identical generic nodes into this node, re-using the result.
23979 SDValue Pred = N->getOperand(0);
23980 SDValue LHS = N->getOperand(1);
23981 SDValue RHS = N->getOperand(2);
23982 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
23985 LHS->getOpcode() != ISD::SIGN_EXTEND)
23988 SDValue Extract = LHS->getOperand(0);
23989 if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
23990 Extract->getValueType(0) != N->getValueType(0) ||
23991 Extract->getConstantOperandVal(1) != 0)
23994 SDValue InnerSetCC = Extract->getOperand(0);
23995 if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
24006 Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
24007 Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
24015 assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
24019 SDValue Pred = N->getOperand(0);
24020 SDValue LHS = N->getOperand(1);
24021 SDValue RHS = N->getOperand(2);
24022 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
24028 LHS->getOpcode() == ISD::SIGN_EXTEND &&
24029 LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) {
24033 if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
24034 LHS->getOperand(0)->getOperand(0) == Pred)
24035 return LHS->getOperand(0);
24039 // -> nxvNi1 ...
24041 return LHS->getOperand(0);
24045 // -> nxvNi1 and(pred, ...)
24049 return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0),
24050 LHS->getOperand(0), Pred);
24063 if (!Op->hasOneUse())
24066 // We don't handle undef/constant-fold cases below, as they should have
24070 // (tbz (trunc x), b) -> (tbz x, b)
24072 if (Op->getOpcode() == ISD::TRUNCATE &&
24073 Bit < Op->getValueType(0).getSizeInBits()) {
24074 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24077 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
24078 if (Op->getOpcode() == ISD::ANY_EXTEND &&
24079 Bit < Op->getOperand(0).getValueSizeInBits()) {
24080 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24083 if (Op->getNumOperands() != 2)
24086 auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
24090 switch (Op->getOpcode()) {
24094 // (tbz (and x, m), b) -> (tbz x, b)
24096 if ((C->getZExtValue() >> Bit) & 1)
24097 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24100 // (tbz (shl x, c), b) -> (tbz x, b-c)
24102 if (C->getZExtValue() <= Bit &&
24103 (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
24104 Bit = Bit - C->getZExtValue();
24105 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24109 // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
24111 Bit = Bit + C->getZExtValue();
24112 if (Bit >= Op->getValueType(0).getSizeInBits())
24113 Bit = Op->getValueType(0).getSizeInBits() - 1;
24114 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24116 // (tbz (srl x, c), b) -> (tbz x, b+c)
24118 if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
24119 Bit = Bit + C->getZExtValue();
24120 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24124 // (tbz (xor x, -1), b) -> (tbnz x, b)
24126 if ((C->getZExtValue() >> Bit) & 1)
24128 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
24132 // Optimize test single bit zero/non-zero and branch.
24136 unsigned Bit = N->getConstantOperandVal(2);
24138 SDValue TestSrc = N->getOperand(1);
24144 unsigned NewOpc = N->getOpcode();
24155 return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
24156 DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
24165 auto SelectA = N->getOperand(1);
24166 auto SelectB = N->getOperand(2);
24167 auto NTy = N->getValueType(0);
24171 SDValue SetCC = N->getOperand(0);
24186 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
24197 // vselect (v1i1 setcc) ->
24206 SDValue N0 = N->getOperand(0);
24210 return N->getOperand(1);
24213 return N->getOperand(2);
24215 // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
24216 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
24218 SDValue SetCC = N->getOperand(0);
24224 SDNode *SplatLHS = N->getOperand(1).getNode();
24225 SDNode *SplatRHS = N->getOperand(2).getNode();
24227 if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
24237 NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
24242 auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
24254 EVT ResVT = N->getValueType(0);
24260 SDValue IfTrue = N->getOperand(1);
24261 SDValue IfFalse = N->getOperand(2);
24264 cast<CondCodeSDNode>(N0.getOperand(2))->get());
24270 /// the compare-mask instructions rather than going via NZCV, even if LHS and
24276 SDValue N0 = N->getOperand(0);
24277 EVT ResVT = N->getValueType(0);
24289 "Scalar-SETCC feeding SELECT has unexpected result type!");
24292 // largest real NEON comparison is 64-bits per lane, which means the result is
24293 // at most 32-bits and an illegal vector. Just bail out for now.
24335 return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
24340 EVT VT = N->getValueType(0);
24346 SmallVector<SDValue> Ops(N->ops());
24347 if (SDNode *LN = DCI.DAG.getNodeIfExists(N->getOpcode(),
24354 if (N->getOpcode() == AArch64ISD::DUP) {
24363 SDValue EXTRACT_VEC_ELT = N->getOperand(0);
24381 if (N->getValueType(0) == N->getOperand(0).getValueType())
24382 return N->getOperand(0);
24383 if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
24384 return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0),
24385 N->getOperand(0).getOperand(0));
24392 // globaladdr as (globaladdr + constant) - constant.
24397 if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
24401 uint64_t MinOffset = -1ull;
24402 for (SDNode *N : GN->uses()) {
24403 if (N->getOpcode() != ISD::ADD)
24405 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
24407 C = dyn_cast<ConstantSDNode>(N->getOperand(1));
24410 MinOffset = std::min(MinOffset, C->getZExtValue());
24412 uint64_t Offset = MinOffset + GN->getOffset();
24416 // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
24417 if (Offset <= uint64_t(GN->getOffset()))
24432 const GlobalValue *GV = GN->getGlobal();
24433 Type *T = GV->getValueType();
24434 if (!T->isSized() ||
24435 Offset > GV->getDataLayout().getTypeAllocSize(T))
24446 SDValue BR = N->getOperand(0);
24447 if (!Subtarget->hasCSSC() || BR.getOpcode() != ISD::BITREVERSE ||
24499 OffsetConst->getZExtValue(), ScalarSizeInBytes);
24505 const SDValue Src = N->getOperand(2);
24506 const EVT SrcVT = Src->getValueType(0);
24528 SDValue Base = N->getOperand(4);
24531 SDValue Offset = N->getOperand(5);
24534 // applies to non-temporal scatters because there's no instruction that takes
24546 // In the case of non-temporal gather loads there's only one SVE instruction
24547 // per data-size: "scalar + vector", i.e.
24559 // immediates outside that range and non-immediate scalar offsets use SST1 or
24590 // Keep the original type of the input data to store - this is needed to be
24605 SDValue Ops[] = {N->getOperand(0), // Chain
24607 N->getOperand(3), // Pg
24618 const EVT RetVT = N->getValueType(0);
24630 SDValue Base = N->getOperand(3);
24633 SDValue Offset = N->getOperand(4);
24636 // offsets. This applies to non-temporal and quadword gathers, which do not
24648 // In the case of non-temporal gather loads and quadword gather loads there's
24662 // immediates outside that range and non-immediate scalar offsets use
24695 // Keep the original output value type around - this is needed to be able to
24703 SDValue Ops[] = {N->getOperand(0), // Chain
24704 N->getOperand(2), // Pg
24725 SDValue Src = N->getOperand(0);
24726 unsigned Opc = Src->getOpcode();
24728 // Sign extend of an unsigned unpack -> signed unpack
24738 // ->
24740 // ->
24742 SDValue ExtOp = Src->getOperand(0);
24743 auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
24755 return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
24830 EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
24831 EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
24836 EVT DstVT = N->getValueType(0);
24840 for (unsigned I = 0; I < Src->getNumOperands(); ++I)
24841 Ops.push_back(Src->getOperand(I));
24852 /// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
24856 SDValue Offset = N->getOperand(OffsetPos);
24862 // Extend the unpacked offset vector to 64-bit lanes.
24865 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
24866 // Replace the offset operand with the 64-bit one.
24869 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
24881 if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
24885 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
24893 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
24922 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
24923 SDValue InsertVec = N->getOperand(0);
24924 SDValue InsertElt = N->getOperand(1);
24925 SDValue InsertIdx = N->getOperand(2);
24944 // If we get here we are effectively trying to zero lanes 1-N of a vector.
24947 if (N->getValueType(0) != ExtractVec.getValueType())
24968 SDValue N0 = N->getOperand(0);
24969 EVT VT = N->getValueType(0);
24972 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND)
24980 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
24984 N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
24986 VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) {
24989 LN0->getChain(), LN0->getBasePtr(),
24990 N0.getValueType(), LN0->getMemOperand());
25005 EVT VT = N->getValueType(0);
25008 if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
25013 SDValue Mask = N->getOperand(0);
25014 SDValue In1 = N->getOperand(1);
25015 SDValue In2 = N->getOperand(2);
25024 EVT VT = N->getValueType(0);
25026 SDValue Insert = N->getOperand(0);
25034 uint64_t IdxDupLane = N->getConstantOperandVal(1);
25052 DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
25054 NewInsert, N->getOperand(1));
25065 SDValue LHS = N->getOperand(0);
25066 SDValue RHS = N->getOperand(1);
25094 // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
25117 if (ExtractHighSrcVec->use_size() != 2)
25121 for (SDNode *User : ExtractHighSrcVec.getNode()->uses()) {
25125 if (User->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
25126 !isNullConstant(User->getOperand(1))) {
25134 if (!ExtractLow || !ExtractLow->hasOneUse())
25139 SDNode *ExtractLowUser = *ExtractLow.getNode()->use_begin();
25140 if (ExtractLowUser->getOpcode() != N->getOpcode()) {
25143 if (ExtractLowUser->getOperand(0) == ExtractLow) {
25144 if (ExtractLowUser->getOperand(1).getOpcode() == ISD::TRUNCATE)
25145 TruncLow = ExtractLowUser->getOperand(1);
25149 if (ExtractLowUser->getOperand(0).getOpcode() == ISD::TRUNCATE)
25150 TruncLow = ExtractLowUser->getOperand(0);
25159 // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
25222 EVT VT = N->getValueType(0);
25226 SDValue ZEXT = N->getOperand(0);
25258 switch (N->getOpcode()) {
25291 APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
25293 APInt::getAllOnes(N->getValueType(0).getVectorNumElements());
25415 switch (N->getConstantOperandVal(1)) {
25566 unsigned IntrinsicID = N->getConstantOperandVal(1);
25573 N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
25583 DAG.getVTList(MVT::Other), N->getOperand(0),
25584 N->getOperand(2), N->getOperand(3));
25587 DAG.getVTList(MVT::Other), N->getOperand(0),
25588 N->getOperand(2), N->getOperand(3));
25604 // we can't perform a tail-call. In particular, we need to check for
25609 if (N->getNumValues() != 1)
25611 if (!N->hasNUsesOfValue(1, 0))
25615 SDNode *Copy = *N->use_begin();
25616 if (Copy->getOpcode() == ISD::CopyToReg) {
25619 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
25622 TCChain = Copy->getOperand(0);
25623 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
25627 for (SDNode *Node : Copy->uses()) {
25628 if (Node->getOpcode() != AArch64ISD::RET_GLUE)
25645 return CI->isTailCall();
25652 if (!CstOffset || CstOffset->isZero())
25658 return isInt<9>(CstOffset->getSExtValue());
25665 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
25668 // Non-null if there is exactly one user of the loaded value (ignoring chain).
25670 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
25677 ValOnlyUser = nullptr; // Multiple non-chain uses, bail out.
25688 if (ValOnlyUser && ValOnlyUser->getValueType(0).isScalableVector() &&
25689 (ValOnlyUser->getOpcode() == ISD::SPLAT_VECTOR ||
25690 (ValOnlyUser->getOpcode() == AArch64ISD::DUP_MERGE_PASSTHRU &&
25691 IsUndefOrZero(ValOnlyUser->getOperand(2)))))
25694 Base = Op->getOperand(0);
25697 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
25698 int64_t RHSC = RHS->getSExtValue();
25699 if (Op->getOpcode() == ISD::SUB)
25700 RHSC = -(uint64_t)RHSC;
25703 // Always emit pre-inc/post-inc addressing mode. Use negated constant offset
25705 Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0));
25718 VT = LD->getMemoryVT();
25719 Ptr = LD->getBasePtr();
25721 VT = ST->getMemoryVT();
25722 Ptr = ST->getBasePtr();
25738 VT = LD->getMemoryVT();
25739 Ptr = LD->getBasePtr();
25741 VT = ST->getMemoryVT();
25742 Ptr = ST->getBasePtr();
25748 // Post-indexing updates the base, so it's not a valid transform
25760 SDValue Op = N->getOperand(0);
25761 EVT VT = N->getValueType(0);
25788 SDValue Op = N->getOperand(0);
25789 EVT VT = N->getValueType(0);
25802 SDValue Op = N->getOperand(0);
25803 EVT VT = N->getValueType(0);
25823 "Expected fp->int bitcast!");
25854 EVT VT = N->getValueType(0);
25857 !N->getFlags().hasAllowReassociation()) ||
25858 (VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
25862 SDValue X = N->getOperand(0);
25863 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(1));
25865 Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
25866 X = N->getOperand(1);
25871 if (Shuf->getOperand(0) != X || !Shuf->getOperand(1)->isUndef())
25875 ArrayRef<int> Mask = Shuf->getMask();
25877 if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1))
25906 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
25915 SDValue In = N->getOperand(0);
25923 EVT VT = N->getValueType(0);
25932 auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
25936 unsigned Index = CIndex->getZExtValue();
25943 SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
25966 assert(N->getValueType(0) == MVT::i128 &&
25969 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
25970 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
25971 // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
25974 createGPRPairNode(DAG, N->getOperand(2)), // Compare value
25975 createGPRPairNode(DAG, N->getOperand(3)), // Store value
25976 N->getOperand(1), // Ptr
25977 N->getOperand(0), // Chain in
25981 switch (MemOp->getMergedOrdering()) {
26017 switch (MemOp->getMergedOrdering()) {
26036 auto Desired = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
26037 auto New = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
26038 SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
26039 New.first, New.second, N->getOperand(0)};
26053 // LowerATOMIC_LOAD_AND). We can't take that approach with 128-bit, because
26054 // the type is not legal. Therefore we shouldn't expect to see a 128-bit
26129 // LSE128 has a 128-bit RMW ops, but i128 is not a legal type, so lower it
26137 assert(N->getValueType(0) == MVT::i128 &&
26140 if (!Subtarget->hasLSE128())
26143 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
26144 const SDValue &Chain = N->getOperand(0);
26145 const SDValue &Ptr = N->getOperand(1);
26146 const SDValue &Val128 = N->getOperand(2);
26150 const unsigned ISDOpcode = N->getOpcode();
26152 getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering());
26158 DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.first);
26161 DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.second);
26184 switch (N->getOpcode()) {
26239 assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
26246 assert(N->getValueType(0) != MVT::i128 &&
26247 "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
26252 assert(cast<AtomicSDNode>(N)->getVal().getValueType() == MVT::i128 &&
26253 "Expected 128-bit atomicrmw.");
26261 EVT MemVT = LoadNode->getMemoryVT();
26262 // Handle lowering 256 bit non temporal loads into LDNP for little-endian
26264 if (LoadNode->isNonTemporal() && Subtarget->isLittleEndian() &&
26276 {LoadNode->getChain(), LoadNode->getBasePtr()},
26277 LoadNode->getMemoryVT(), LoadNode->getMemOperand());
26285 if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
26286 LoadNode->getMemoryVT() != MVT::i128) {
26287 // Non-volatile or atomic loads are optimized later in AArch64's load/store
26295 AN && AN->getSuccessOrdering() == AtomicOrdering::Acquire;
26299 assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
26303 {LoadNode->getChain(), LoadNode->getBasePtr()},
26304 LoadNode->getMemoryVT(), LoadNode->getMemOperand());
26310 Result.getValue(FirstRes), Result.getValue(1 - FirstRes));
26321 // CONCAT_VECTORS -- but delegate to common code for result type
26325 EVT VT = N->getValueType(0);
26328 static_cast<Intrinsic::ID>(N->getConstantOperandVal(0));
26336 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
26338 N->getOperand(1), Op2, N->getOperand(3));
26346 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
26348 N->getOperand(1), Op2, N->getOperand(3));
26357 N->getOperand(1), N->getOperand(2));
26366 N->getOperand(1), N->getOperand(2));
26380 auto V = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, NewVT, N->ops());
26388 assert(N->getValueType(0) == MVT::i128 &&
26389 "READ_REGISTER custom lowering is only for 128-bit sysregs");
26390 SDValue Chain = N->getOperand(0);
26391 SDValue SysRegName = N->getOperand(1);
26398 // of the 128-bit System Register value.
26409 if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
26431 // In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
26432 // provided the address is 16-byte aligned.
26434 if (!Subtarget->hasLSE2())
26438 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
26439 LI->getAlign() >= Align(16);
26442 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
26443 SI->getAlign() >= Align(16);
26449 if (!Subtarget->hasLSE128())
26455 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
26456 SI->getAlign() >= Align(16) &&
26457 (SI->getOrdering() == AtomicOrdering::Release ||
26458 SI->getOrdering() == AtomicOrdering::SequentiallyConsistent);
26461 return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
26462 RMW->getAlign() >= Align(16) &&
26463 (RMW->getOperation() == AtomicRMWInst::Xchg ||
26464 RMW->getOperation() == AtomicRMWInst::And ||
26465 RMW->getOperation() == AtomicRMWInst::Or);
26471 if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
26475 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
26476 LI->getAlign() >= Align(16) &&
26477 LI->getOrdering() == AtomicOrdering::Acquire;
26480 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
26481 SI->getAlign() >= Align(16) &&
26482 SI->getOrdering() == AtomicOrdering::Release;
26500 // Store-Release instructions only provide seq_cst guarantees when paired with
26501 // Load-Acquire instructions. MSVC CRT does not use these instructions to
26504 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
26507 switch (I->getOpcode()) {
26511 return cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() ==
26514 return cast<AtomicRMWInst>(I)->getOrdering() ==
26517 return cast<StoreInst>(I)->getOrdering() ==
26522 // Loads and stores less than 128-bits are already atomic; ones above that
26527 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
26539 // Loads and stores less than 128-bits are already atomic; ones above that
26544 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
26554 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
26558 // succeed. So at -O0 lower this operation to a CAS loop.
26564 return Subtarget->hasLSE() ? AtomicExpansionKind::CmpXChg
26569 // However, with the LSE instructions (or outline-atomics mode, which provides
26570 // library routines in place of the LSE-instructions), we can directly emit many
26573 // Floating-point operations are always emitted to a cmpxchg loop, because they
26577 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
26580 if (AI->isFloatingPointOperation())
26583 bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
26584 (AI->getOperation() == AtomicRMWInst::Xchg ||
26585 AI->getOperation() == AtomicRMWInst::Or ||
26586 AI->getOperation() == AtomicRMWInst::And);
26592 if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
26593 if (Subtarget->hasLSE())
26595 if (Subtarget->outlineAtomics()) {
26598 // (1) high level <atomic> support approved:
26599 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
26600 // (2) low level libgcc and compiler-rt support implemented by:
26602 if (AI->getOperation() != AtomicRMWInst::Min &&
26603 AI->getOperation() != AtomicRMWInst::Max &&
26604 AI->getOperation() != AtomicRMWInst::UMin &&
26605 AI->getOperation() != AtomicRMWInst::UMax) {
26611 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
26615 // succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
26618 Subtarget->hasLSE())
26628 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
26630 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
26634 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
26638 // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
26640 unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
26650 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
26653 // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
26656 if (ValueTy->getPrimitiveSizeInBits() == 128) {
26671 Type *Tys[] = { Addr->getType() };
26676 const DataLayout &DL = M->getDataLayout();
26679 CI->addParamAttr(
26688 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
26695 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
26701 if (Val->getType()->getPrimitiveSizeInBits() == 128) {
26705 Type *Int64Ty = Type::getInt64Ty(M->getContext());
26714 Type *Tys[] = { Addr->getType() };
26717 const DataLayout &DL = M->getDataLayout();
26718 IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
26723 Val, Stxr->getFunctionType()->getParamType(0)),
26725 CI->addParamAttr(1, Attribute::get(Builder.getContext(),
26726 Attribute::ElementType, Val->getType()));
26733 if (!Ty->isArrayTy()) {
26734 const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
26750 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
26763 if (Subtarget->isTargetAndroid())
26768 if (Subtarget->isTargetFuchsia())
26769 return UseTlsOffset(IRB, -0x10);
26776 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
26783 M.getOrInsertFunction(Subtarget->getSecurityCheckCookieName(),
26787 F->setCallingConv(CallingConv::Win64);
26788 F->addParamAttr(0, Attribute::AttrKind::InReg);
26797 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
26804 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
26805 return M.getFunction(Subtarget->getSecurityCheckCookieName());
26814 if (Subtarget->isTargetAndroid())
26819 if (Subtarget->isTargetFuchsia())
26820 return UseTlsOffset(IRB, -0x8);
26835 return Mask->getValue().isPowerOf2();
26855 !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
26863 AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
26864 AFI->setIsSplitCSR(true);
26870 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
26871 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
26875 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
26876 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
26877 MachineBasicBlock::iterator MBBI = Entry->begin();
26887 Register NewVR = MRI->createVirtualRegister(RC);
26889 // FIXME: this currently does not emit CFI pseudo-instructions, it works
26890 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
26892 // CFI pseudo-instructions.
26893 assert(Entry->getParent()->getFunction().hasFnAttribute(
26896 Entry->addLiveIn(*I);
26897 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
26900 // Insert the copy-back instructions right before the terminator.
26902 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
26903 TII->get(TargetOpcode::COPY), *I)
26913 // integer division, leaving the division as-is is a loss even in terms of
26921 // We want inc-of-add for scalars and sub-of-not for vectors.
26929 if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
26940 assert(MBBI->isCall() && MBBI->getCFIType() &&
26943 switch (MBBI->getOpcode()) {
26955 MachineOperand &Target = MBBI->getOperand(0);
26959 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(AArch64::KCFI_CHECK))
26961 .addImm(MBBI->getCFIType())
26966 return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
26971 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
27029 if (GV.isThreadLocal() && Subtarget->isTargetMachO())
27046 auto APF = MI.getOperand(1).getFPImm()->getValueAPF();
27057 APInt Imm = CI->getValue();
27058 InstructionCost Cost = TTI->getIntImmCost(
27059 Imm, CI->getType(), TargetTransformInfo::TCK_CodeSize);
27068 --MaxUses;
27089 if (Inst.getType()->isScalableTy()) {
27094 if (Inst.getOperand(i)->getType()->isScalableTy())
27098 if (AI->getAllocatedType()->isScalableTy())
27234 EVT MemVT = Load->getMemoryVT();
27244 LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
27245 DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
27246 Load->getAddressingMode(), Load->getExtensionType());
27249 if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
27251 Load->getMemoryVT().getVectorElementType());
27292 SDValue Mask = Load->getMask();
27296 assert(Load->getExtensionType() != ISD::NON_EXTLOAD &&
27305 if (Load->getPassThru()->isUndef()) {
27313 if (isZerosVector(Load->getPassThru().getNode()))
27318 ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
27319 Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
27320 Load->getAddressingMode(), Load->getExtensionType());
27325 convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
27340 EVT VT = Store->getValue().getValueType();
27342 EVT MemVT = Store->getMemoryVT();
27345 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
27347 if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
27349 Store->getMemoryVT().getVectorElementType());
27362 return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
27363 Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
27364 Store->getMemOperand(), Store->getAddressingMode(),
27365 Store->isTruncatingStore());
27373 EVT VT = Store->getValue().getValueType();
27376 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
27377 SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
27380 Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
27381 Mask, Store->getMemoryVT(), Store->getMemOperand(),
27382 Store->getAddressingMode(), Store->isTruncatingStore());
27531 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
27544 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
27563 assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
27568 for (const SDValue &V : Op->op_values()) {
27575 EVT VTArg = VTNode->getVT().getVectorElementType();
27582 "Expected only legal fixed-width types");
27596 for (const SDValue &V : Op->op_values()) {
27606 return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
27621 for (const SDValue &V : Op->op_values()) {
27624 // Pass through non-vector operands.
27721 /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
27753 SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
27754 SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
27812 unsigned NumOperands = Op->getNumOperands();
27827 Op->getOperand(I), Op->getOperand(I + 1)));
27969 SDValue Chain = HG->getChain();
27970 SDValue Inc = HG->getInc();
27971 SDValue Mask = HG->getMask();
27972 SDValue Ptr = HG->getBasePtr();
27973 SDValue Index = HG->getIndex();
27974 SDValue Scale = HG->getScale();
27975 SDValue IntID = HG->getIntID();
27980 assert(CID->getZExtValue() == Intrinsic::experimental_vector_histogram_add &&
27992 MachineMemOperand *MMO = HG->getMemOperand();
27995 MMO->getPointerInfo(), MachineMemOperand::MOLoad, MMO->getSize(),
27996 MMO->getAlign(), MMO->getAAInfo());
27997 ISD::MemIndexType IndexType = HG->getIndexType();
28013 MMO->getPointerInfo(), MachineMemOperand::MOStore, MMO->getSize(),
28014 MMO->getAlign(), MMO->getAAInfo());
28087 uint64_t MaxOffset = APInt(BitsPerElt, -1, false).getZExtValue();
28099 // Bail out for 8-bits element types, because with 2048-bit SVE register
28110 // is not known at compile-time, we need to maintain a mask with 'VL' values
28114 Index += IndexLen - ElementsPerVectorReg;
28116 Index = Index - ElementsPerVectorReg;
28121 // For 8-bit elements and 1024-bit SVE registers and MaxOffset equals
28129 // Choosing an out-of-range index leads to the lane being zeroed vs zero
28131 // index elements. For i8 elements an out-of-range index could be a valid
28132 // for 2048-bit vector register size.
28133 for (unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
28183 auto ShuffleMask = SVN->getMask();
28193 auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT {
28199 if (SVN->isSplat()) {
28200 unsigned Lane = std::max(0, SVN->getSplatIndex());
28211 Imm == VT.getVectorNumElements() - 1) {
28217 DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
28242 if (Subtarget->hasSVE2p1() && EltSize == 64 &&
28284 // are actually sub-vectors of a larger SVE register. When mapping
28292 // when converting from fixed-length to scalable vector types (i.e. the start
28294 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
28295 unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
28327 // 128-bits.
28328 if (MinSVESize || !Subtarget->isNeonAvailable())
28396 SDValue ShiftR = Op->getOperand(0);
28397 if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
28403 unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
28404 unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
28421 // used - simplify to just Val.
28422 return TLO.CombineTo(Op, ShiftR->getOperand(0));
28430 uint64_t BitsToClear = Op->getConstantOperandVal(1)
28431 << Op->getConstantOperandVal(2);
28444 unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
28456 Known.Zero.setHighBits(BitWidth - RequiredBits);
28475 return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
28476 Subtarget->hasComplxNum();
28487 if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
28490 auto *ScalarTy = VTy->getScalarType();
28491 unsigned NumElements = VTy->getElementCount().getKnownMinValue();
28495 // power-of-2 size, as we later split them into the smallest supported size
28497 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
28498 if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) ||
28502 if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
28503 unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
28507 return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
28508 ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
28515 VectorType *Ty = cast<VectorType>(InputA->getType());
28516 bool IsScalable = Ty->isScalableTy();
28517 bool IsInt = Ty->getElementType()->isIntegerTy();
28520 Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();
28526 int Stride = Ty->getElementCount().getKnownMinValue() / 2;
28561 auto *Mask = B.getAllOnesMask(Ty->getElementCount());
28586 auto *Mask = B.getAllOnesMask(Ty->getElementCount());
28610 unsigned Opc = N->getOpcode();
28612 if (any_of(N->uses(),
28613 [&](SDNode *Use) { return Use->getOpcode() == ISD::MUL; }))
28620 return Subtarget->getMinimumJumpTableEntries();
28628 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
28643 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
28662 assert(Subtarget->useSVEForFixedLengthVectors() && "Unexpected mode!");
28721 return !Subtarget->isTargetWindows() &&
28722 MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
28727 switch (N->getOpcode()) {
28734 assert(N->getNumValues() == 1 && "Expected one result!");
28735 assert(N->getNumOperands() == 1 && "Expected one operand!");
28736 EVT VT = N->getValueType(0);
28737 EVT OpVT = N->getOperand(0).getValueType();
28753 assert(N->getNumValues() == 1 && "Expected one result!");
28754 assert(N->getNumOperands() == 2 && "Expected two operands!");
28755 EVT VT = N->getValueType(0);
28756 EVT Op0VT = N->getOperand(0).getValueType();
28757 EVT Op1VT = N->getOperand(1).getValueType();