ARMISelLowering.cpp - OpenGrok cross reference for /freebsd-src/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines Matching +full:fiq +full:- +full:based
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
122 #define DEBUG_TYPE "arm-isel"
131 ARMInterworking("arm-interworking", cl::Hidden,
136     "arm-promote-constant", cl::Hidden,
141     "arm-promote-constant-max-size", cl::Hidden,
145     "arm-promote-constant-max-total", cl::Hidden,
150 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
403   // MVE integer-only / float support. Only doing FP data processing on the FP
404   // vector types is inhibited at integer-only level.
499   RegInfo = Subtarget->getRegisterInfo();
500   Itins = Subtarget->getInstrItineraryData();
505   if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
506       !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
514   if (Subtarget->isTargetMachO()) {
516     if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
517         Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
523         // Single-precision floating-point arithmetic.
529         // Double-precision floating-point arithmetic.
535         // Single-precision comparisons.
544         // Double-precision comparisons.
553         // Floating-point to integer conversions.
565         // Integer to floating-point conversions.
585   if (Subtarget->isAAPCS_ABI() &&
586       (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
587        Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
594       // Double-precision floating-point arithmetic helper functions
601       // Double-precision floating-point comparison helper functions
611       // Single-precision floating-point arithmetic helper functions
618       // Single-precision floating-point comparison helper functions
628       // Floating-point to integer conversions.
645       // Integer to floating-point conversions.
707   if (Subtarget->isTargetWindows()) {
729   // Use divmod compiler-rt calls for iOS 5.0 and later.
730   if (Subtarget->isTargetMachO() &&
731       !(Subtarget->isTargetIOS() &&
732         Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
737   // The half <-> float conversion functions are always soft-float on
738   // non-watchos platforms, but are needed for some targets which use a
739   // hard-float calling convention by default.
740   if (!Subtarget->isTargetWatchABI()) {
741     if (Subtarget->isAAPCS_ABI()) {
754   if (Subtarget->isTargetAEABI()) {
771   if (Subtarget->isThumb1Only())
776   if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
777       Subtarget->hasFPRegs()) {
786     if (!Subtarget->hasVFP2Base())
788     if (!Subtarget->hasFP64())
792   if (Subtarget->hasFullFP16()) {
801   if (Subtarget->hasBF16()) {
804     if (!Subtarget->hasFullFP16())
826   if (Subtarget->hasMVEIntegerOps())
827     addMVEVectorTypes(Subtarget->hasMVEFloatOps());
829   // Combine low-overhead loop intrinsics so that we can lower i1 types.
830   if (Subtarget->hasLOB()) {
834   if (Subtarget->hasNEON()) {
848     if (Subtarget->hasFullFP16()) {
853     if (Subtarget->hasBF16()) {
859   if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
899   if (Subtarget->hasNEON()) {
939     // Custom handling for some quad-vector types to detect VMULL.
965     // types wider than 8-bits.  However, custom lowering can leverage the
1004     if (!Subtarget->hasVFP4Base()) {
1031   if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1039   if (Subtarget->hasMVEIntegerOps()) {
1044   if (Subtarget->hasMVEFloatOps()) {
1048   if (!Subtarget->hasFP64()) {
1049     // When targeting a floating-point unit with only single-precision
1050     // operations, f64 is legal for the few double-precision instructions which
1051     // are present However, no double-precision operations other than moves,
1092   if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1095     if (Subtarget->hasFullFP16()) {
1101   if (!Subtarget->hasFP16()) {
1106   computeRegisterProperties(Subtarget->getRegisterInfo());
1108   // ARM does not have floating-point extending loads.
1124   if (!Subtarget->isThumb1Only()) {
1137     // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1149   if (Subtarget->hasDSP()) {
1159   if (Subtarget->hasBaseDSP()) {
1167   if (Subtarget->isThumb1Only()) {
1171   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1172       || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1187   if (Subtarget->hasMVEIntegerOps())
1191   if (Subtarget->isThumb1Only()) {
1197   if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1208   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1216   // implementation-specific ways of obtaining this information.
1217   if (Subtarget->hasPerfMon())
1221   if (!Subtarget->hasV6Ops())
1224   bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1225                                         : Subtarget->hasDivideInARMMode();
1232   if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1243   // Register based DivRem for AEABI (RTABI 4.2)
1244   if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1245       Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1246       Subtarget->isTargetWindows()) {
1251     if (Subtarget->isTargetWindows()) {
1320   if (Subtarget->isTargetWindows())
1328   if (Subtarget->hasAnyDataBarrier() &&
1329       (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1333     if (!Subtarget->isThumb() || !Subtarget->isMClass())
1338     if (!Subtarget->hasAcquireRelease() ||
1347     if (Subtarget->hasDataBarrier())
1351                        Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1375   if (Subtarget->isTargetLinux() ||
1376       (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1380     // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1381     // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1388   } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1389              Subtarget->hasForced32BitAtomics()) {
1390     // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1403   if (!Subtarget->hasV6Ops()) {
1409   if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1410       !Subtarget->isThumb1Only()) {
1411     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1429   if (Subtarget->useSjLjEH())
1441   if (Subtarget->hasFullFP16()) {
1451   if (Subtarget->hasFullFP16())
1466   if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1467       !Subtarget->isThumb1Only()) {
1474   if (!Subtarget->hasVFP4Base()) {
1480   if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1481     // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1482     if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1487     // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1488     if (!Subtarget->hasFP16()) {
1493     // Strict floating-point comparisons need custom lowering.
1509   // FP-ARMv8 implements a lot of rounding-like FP operations.
1510   if (Subtarget->hasFPARMv8Base()) {
1519     if (Subtarget->hasNEON()) {
1526     if (Subtarget->hasFP64()) {
1539   if (Subtarget->hasFullFP16()) {
1558   if (Subtarget->hasNEON()) {
1570     if (Subtarget->hasFullFP16()) {
1583   // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined.  MinGW has
1585   if (Subtarget->isTargetWindows()) {
1597   // We have target-specific dag combine patterns for the following nodes:
1598   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
1602   if (Subtarget->hasMVEIntegerOps())
1605   if (Subtarget->hasV6Ops())
1607   if (Subtarget->isThumb1Only())
1610   if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1611       Subtarget->isThumb2()) {
1617   if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1618       !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1623   //// temporary - rewrite interface to use type
1626   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1628   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1635   // Prefer likely predicted branches to selects on out-of-order cores.
1636   PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1638   setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1639   setPrefFunctionAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1641   setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1645   return Subtarget->useSoftFloat();
1649 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1650 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1674     // to the VFP2 class (D0-D15). We currently model this constraint prior to
1675     // coalescing by double-counting the SP regs. See the FIXME above.
1676     if (Subtarget->useNEONForSinglePrecisionFP())
1916   if ((Subtarget->hasMVEIntegerOps() &&
1919       (Subtarget->hasMVEFloatOps() &&
1925 /// getRegClassFor - Return the register class that should be used for the
1934   if (Subtarget->hasNEON()) {
1940   if (Subtarget->hasMVEIntegerOps()) {
1957   // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1958   // cycle faster than 4-byte aligned LDM.
1960       (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1972   unsigned NumVals = N->getNumValues();
1977     EVT VT = N->getValueType(i);
1984   if (!N->isMachineOpcode())
1989   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1990   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1994   if (!Itins->isEmpty() &&
1995       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
2001 //===----------------------------------------------------------------------===//
2003 //===----------------------------------------------------------------------===//
2009     return Const->getZExtValue() == 16;
2017     return Const->getZExtValue() == 16;
2025     return Const->getZExtValue() == 16;
2029 // Check for a signed 16-bit value. We special case SRA because it makes it
2039 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2056 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2085 //===----------------------------------------------------------------------===//
2087 //===----------------------------------------------------------------------===//
2089 /// getEffectiveCallingConv - Get the effective calling convention, taking into
2113     if (!Subtarget->isAAPCS_ABI())
2115     else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2123     if (!Subtarget->isAAPCS_ABI()) {
2124       if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2127     } else if (Subtarget->hasVFP2Base() &&
2128                !Subtarget->isThumb1Only() && !isVarArg)
2145 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2176   if (Subtarget->hasFullFP16()) {
2189   if (Subtarget->hasFullFP16()) {
2201 /// LowerCallResult - Lower the result values of a call into the
2240       if (!Subtarget->isLittle())
2257         if (!Subtarget->isLittle())
2279     // had been copied to the LSBs of a 32-bit register.
2285     // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2286     // is less than 32 bits must be sign- or zero-extended after the call for
2337   unsigned id = Subtarget->isLittle() ? 0 : 1;
2341     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2353         DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2362 /// LowerCall - Lowering a call into a callseq_start <-
2363 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2396   // Lower 'returns_twice' calls to a pseudo-instruction.
2397   if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2398       !Subtarget->noBTIAtReturnTwice())
2399     GuardWithBTI = AFI->branchTargetEnforcement();
2401   // Determine whether this is a non-secure function call.
2402   if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2406   if (!Subtarget->supportsTailCall())
2409   // For both the non-secure calls and the returns from a CMSE entry function,
2412   if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2418     // as BLXr has a 16-bit encoding.
2419     auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2421       auto *BB = CLI.CB->getParent();
2422       PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2423                        count_if(GV->users(), [&BB](const User *U) {
2425                                 cast<Instruction>(U)->getParent() == BB;
2444   if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2455   // arguments to begin at SP+0. Completely unused for non-tail calls.
2460     unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2463     // popped size 16-byte aligned.
2470     SPDiff = NumReusableBytes - NumBytes;
2474     if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2475       AFI->setArgRegsSaveSize(-SPDiff);
2534     // had been copied to the LSBs of a 32-bit register.
2621         offset = RegEnd - RegBegin;
2634         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2659   // Build a sequence of copy-to-reg nodes chained together with token chain
2676     GVal = G->getGlobal();
2677   bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2679   bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2683   if (Subtarget->genLongCalls()) {
2684     assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2685            "long-calls codegen is not position independent!");
2690       if (Subtarget->genExecuteOnly()) {
2691         if (Subtarget->useMovt())
2697         unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2709       const char *Sym = S->getSymbol();
2711       if (Subtarget->genExecuteOnly()) {
2712         if (Subtarget->useMovt())
2718         unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2733       bool isDef = GVal->isStrongDefinitionForLinker();
2736       isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2738       if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2739         assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2748       } else if (Subtarget->isTargetCOFF()) {
2749         assert(Subtarget->isTargetWindows() &&
2752         if (GVal->hasDLLImportStorageClass())
2770     const char *Sym = S->getSymbol();
2771     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2772       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2793                                      "call to non-secure function would "
2796       DAG.getContext()->diagnose(Diag);
2801           "call to non-secure function would return value through pointer",
2803       DAG.getContext()->diagnose(Diag);
2809   if (Subtarget->isThumb()) {
2814     else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2819     if (!isDirect && !Subtarget->hasV5TOps())
2821     else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2823              !Subtarget->hasMinSize())
2830   // We don't usually want to end the call-sequence here because we would tidy
2831   // the frame up *after* the call, however in the ABI-changing tail-call case
2853   // Add a register mask operand representing the call-preserved registers.
2855   const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2857     // For 'this' returns, use the R0-preserving mask if applicable
2858     Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2864       Mask = ARI->getCallPreservedMask(MF, CallConv);
2867     Mask = ARI->getCallPreservedMask(MF, CallConv);
2890   // If we're guaranteeing tail-calls will be honoured, the callee must
2892   // we need to undo that after it returns to restore the status-quo.
2895       canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2908 /// HandleByVal - Every parameter *after* a byval parameter is passed
2917   unsigned Reg = State->AllocateReg(GPRArgRegs);
2922   unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2924     Reg = State->AllocateReg(GPRArgRegs);
2929   unsigned Excess = 4 * (ARM::R4 - Reg);
2935   const unsigned NSAAOffset = State->getStackSize();
2937     while (State->AllocateReg(GPRArgRegs))
2945   // the end (first after last) register would be reg + param-size-in-regs,
2950   State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2954     State->AllocateReg(GPRArgRegs);
2959   Size = std::max<int>(Size - Excess, 0);
2962 /// MatchingStackOffset - Return true if the given stack call argument is
2972     Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2975     MachineInstr *Def = MRI->getVRegDef(VR);
2979       if (!TII->isLoadFromStackSlot(*Def, FI))
2992     SDValue Ptr = Ld->getBasePtr();
2996     FI = FINode->getIndex();
3006 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3025   assert(Subtarget->supportsTailCall());
3028   // to the call take up r0-r3. The reason is that there are no legal registers
3035     if (Subtarget->isThumb1Only())
3038     if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
3045   // Exception-handling functions need a special set of instructions to indicate
3046   // a return to the hardware. Tail-calling another function would probably
3061   // Externally-defined functions with weak linkage should not be
3062   // tail-called on ARM when the OS does not support dynamic
3063   // pre-emption of symbols, as the AAELF spec requires normal calls
3066   // situation (as used for tail calls) is implementation-defined, so we
3069     const GlobalValue *GV = G->getGlobal();
3071     if (GV->hasExternalWeakLinkage() &&
3085   const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3086   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3088     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3089     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3097   if (AFI_Caller->getArgRegsSaveSize())
3108       const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3120           // register/stack-slot combinations.  The types will not match
3169   //    IRQ/FIQ: +4     "subs pc, lr, #4"
3177   if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3184                        "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3198   // CCValAssign - represent the assignment of the return value to a location.
3201   // CCState - Info about the registers and stack slots.
3211   bool isLittleEndian = Subtarget->isLittle();
3215   AFI->setReturnRegsCount(RVLocs.size());
3218   if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3225     DAG.getContext()->diagnose(Diag);
3238     if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3239       // Half-precision return values can be returned like this:
3272     if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3312       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
3332   const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3334       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3351   // CPUs which aren't M-class use a special sequence to return from
3355   // M-class CPUs actually use a normal return sequence with a special
3356   // (hardware-provided) value in LR, so the normal code path works.
3358       !Subtarget->isMClass()) {
3359     if (Subtarget->isThumb1Only())
3364   ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE :
3370   if (N->getNumValues() != 1)
3372   if (!N->hasNUsesOfValue(1, 0))
3376   SDNode *Copy = *N->use_begin();
3377   if (Copy->getOpcode() == ISD::CopyToReg) {
3380     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3382     TCChain = Copy->getOperand(0);
3383   } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3387     for (SDNode *U : VMov->uses()) {
3388       if (U->getOpcode() != ISD::CopyToReg)
3395     for (SDNode *U : VMov->uses()) {
3396       SDValue UseChain = U->getOperand(0);
3404         if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3410   } else if (Copy->getOpcode() == ISD::BITCAST) {
3412     if (!Copy->hasOneUse())
3414     Copy = *Copy->use_begin();
3415     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3419     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3421     TCChain = Copy->getOperand(0);
3427   for (const SDNode *U : Copy->uses()) {
3428     if (U->getOpcode() != ARMISD::RET_GLUE &&
3429         U->getOpcode() != ARMISD::INTRET_GLUE)
3442   if (!Subtarget->supportsTailCall())
3445   if (!CI->isTailCall())
3455   SDValue WriteValue = Op->getOperand(2);
3459           && "LowerWRITE_REGISTER called for non-i64 type argument.");
3463   SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3481   // When generating execute-only code Constant Pools must be promoted to the
3483   // blocks, but this way we guarantee that execute-only behaves correct with
3484   // position-independent addressing modes.
3485   if (Subtarget->genExecuteOnly()) {
3487     auto T = const_cast<Type*>(CP->getType());
3488     auto C = const_cast<Constant*>(CP->getConstVal());
3495                     Twine(AFI->createPICLabelUId())
3502   // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3503   // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3504   Align CPAlign = CP->getAlign();
3505   if (Subtarget->isThumb1Only())
3507   if (CP->isMachineConstantPoolEntry())
3509         DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3511     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3516   // If we don't have a 32-bit pc-relative branch instruction then the jump
3518   // execute-only it must be placed out-of-line.
3519   if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3531   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3533   bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3537     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3538     ARMPCLabelIndex = AFI->createPICLabelUId();
3581   assert(Subtarget->isTargetDarwin() &&
3607       getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3609   const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3625   assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3668   auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3684   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3687   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3689     ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3724   const GlobalValue *GV = GA->getGlobal();
3735     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3737     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3739       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3778   if (Subtarget->isTargetDarwin())
3781   if (Subtarget->isTargetWindows())
3785   assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3786   TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
3802   SmallVector<const User*,4> Worklist(V->users());
3806       append_range(Worklist, U->users());
3811     if (!I || I->getParent()->getParent() != F)
3830   // use-site. We know that if we inline a variable at one use site, we'll
3831   // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3834   // the GV from fast-isel generated code.
3840   if (!GVar || !GVar->hasInitializer() ||
3841       !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3842       !GVar->hasLocalLinkage())
3846   // from .data to .text. This is not allowed in position-independent code.
3847   auto *Init = GVar->getInitializer();
3848   if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3849       Init->needsDynamicRelocation())
3859   unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3861   unsigned RequiredPadding = 4 - (Size % 4);
3863     RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3876   if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3877     if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3893     StringRef S = CDAInit->getAsString();
3897     while (RequiredPadding--)
3904   if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3905     AFI->markGlobalAsPromotedToConstantPool(GVar);
3906     AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3907                                       PaddedSize - 4);
3915     if (!(GV = GA->getAliaseeObject()))
3918     return V->isConstant();
3924   switch (Subtarget->getTargetTriple().getObjectFormat()) {
3939   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3943   if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3949         GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3951     if (!GV->isDSOLocal())
3956   } else if (Subtarget->isROPI() && IsRO) {
3957     // PC-relative.
3961   } else if (Subtarget->isRWPI() && !IsRO) {
3962     // SB-relative.
3964     if (Subtarget->useMovt()) {
3986   if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3987     if (Subtarget->useMovt())
4004   assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4008   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4010   if (Subtarget->useMovt())
4021   if (Subtarget->isGVIndirectSymbol(GV))
4029   assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4030   assert(Subtarget->useMovt() &&
4032   assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4036   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4038   if (GV->hasDLLImportStorageClass())
4095       const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4097           ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4099       // Mark LR an implicit live-in.
4107       if (Subtarget->isThumb())
4176     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4180     unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4258     return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4261     return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4273   if (!Subtarget->hasDataBarrier()) {
4275     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4277     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4286   if (Subtarget->isMClass()) {
4287     // Only a full system barrier exists in the M-class architectures.
4289   } else if (Subtarget->preferISHSTBarriers() &&
4305   if (!(Subtarget->isThumb2() ||
4306         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4313       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4318   if (Subtarget->isThumb()) {
4337   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4338   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4352   if (AFI->isThumb1OnlyFunction())
4375   if (!Subtarget->isLittle())
4380 // The remaining GPRs hold either the beginning of variable-argument
4393   // Currently, two use-cases possible:
4394   // Case #1. Non-var-args function, and we meet first byval parameter.
4399   //          "store-reg" instructions.
4400   // Case #2. Var-args function, that doesn't contain byval parameters.
4417     ArgOffset = -4 * (ARM::R4 - RBegin);
4425       AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4458   AFI->setVarArgsFrameIndex(FrameIndex);
4514   AFI->setArgRegsSaveSize(0);
4540   int lastInsIndex = -1;
4547   unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4548   AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4555                    Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4600           RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4612           AFI->setPreservesR0();
4616       // If this is an 8 or 16-bit value, it is really passed promoted
4628       // had been copied to the LSBs of a 32-bit register.
4635       // less than 32 bits must be sign- or zero-extended in the callee for
4639       if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4691     if (AFI->isCmseNSEntryFunction()) {
4695       DAG.getContext()->diagnose(Diag);
4707     AFI->setArgumentStackToRestore(StackArgSize);
4709   AFI->setArgumentStackSize(StackArgSize);
4711   if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4715     DAG.getContext()->diagnose(Diag);
4721 /// isFloatingPointZero - Return true if this is +0.0.
4724     return CFP->getValueAPF().isPosZero();
4730         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4731           return CFP->getValueAPF().isPosZero();
4733   } else if (Op->getOpcode() == ISD::BITCAST &&
4734              Op->getValueType(0) == MVT::f64) {
4737     SDValue BitcastOp = Op->getOperand(0);
4738     if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4739         isNullConstant(BitcastOp->getOperand(0)))
4751     unsigned C = RHSC->getZExtValue();
4758         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4760           RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4765         if (C != 0 && isLegalICmpImmediate(C-1)) {
4767           RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4788     // In ARM and Thumb-2, the compare instructions can shift their second
4815   if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4816       LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4821     uint64_t RHSV = RHSC->getZExtValue();
4836   // some tweaks to the heuristics for the previous and->shift transform.
4838   if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4839       isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4889   assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4900 /// duplicateCmp - Glue values can have only one use, so this function
5069     // value. So compute 1 - C.
5082   if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5092     switch (Op->getOpcode()) {
5108     switch (Op->getOpcode()) {
5128                   DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5129                   DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5143     if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5158   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5159   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5168       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5169       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5193   // undefined bits before doing a full-word comparison with zero.
5255   if (!Subtarget->hasFP64() && VT == MVT::f64) {
5286 // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5306 //     x < -k ? -k : (x > k ? k : x)
5307 //     x < -k ? -k : (x < k ? x : k)
5308 //     x > -k ? (x > k ? k : x) : -k
5309 //     x < k ? (x < -k ? -k : x) : k
5324   ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5334   ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5346   // Check that the constant in the lower-bound check is
5347   // the opposite of the constant in the upper-bound check
5352   int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5353   int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5379 // - The conditions and values match up
5380 // - k is 0 or -1 (all ones)
5389   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5420     return !Subtarget->hasVFP2Base();
5422     return !Subtarget->hasFP64();
5424     return !Subtarget->hasFullFP16();
5433   if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5438   // into more efficient bit operations, which is possible when k is 0 or -1
5439   // On ARM and Thumb-2 which have flexible operand 2 this will result in
5442   // Only allow this transformation on full-width (32-bit) operations
5459   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5465   if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5467     unsigned TVal = CTVal->getZExtValue();
5468     unsigned FVal = CFVal->getZExtValue();
5496       // -(-a) == a, but (a+1)+1 != a).
5536     if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5551     if (ARMcc->getAsZExtVal() == ARMCC::PL)
5563   if (Subtarget->hasFPARMv8Base() &&
5594 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
5599   if (!N->hasOneUse())
5602   if (!N->getNumValues())
5605   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5607     // vmrs are very slow, e.g. cortex-a8.
5622     return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5623                        Ld->getPointerInfo(), Ld->getAlign(),
5624                        Ld->getMemOperand()->getFlags());
5640     SDValue Ptr = Ld->getBasePtr();
5642         DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5643                     Ld->getAlign(), Ld->getMemOperand()->getFlags());
5648     RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5649                           Ld->getPointerInfo().getWithOffset(4),
5650                           commonAlignment(Ld->getAlign(), 4),
5651                           Ld->getMemOperand()->getFlags());
5658 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5663   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5721                       !Subtarget->isThumb1Only();
5726     if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5736         (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5750   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5772                       !Subtarget->isThumb1Only();
5778     if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5789           (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5839   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5843   if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5844     // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5851   if (isPositionIndependent() || Subtarget->isROPI()) {
5902   bool IsStrict = Op->isStrictFPOpcode();
5939   EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5945       Subtarget->hasFP64())
5948       Subtarget->hasFullFP16())
5951       Subtarget->hasMVEFloatOps())
5954       Subtarget->hasMVEFloatOps())
5962   unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5966                             DAG.getConstant((1 << BW) - 1, DL, VT));
5969                       DAG.getConstant(-(1 << BW), DL, VT));
6046   bool UseNEON = !InGPR && Subtarget->hasNEON();
6138   // Return LR, which contains the return address. Mark it an implicit live-in.
6155   while (Depth--)
6181   assert(N->getValueType(0) == MVT::i64
6182           && "ExpandREAD_REGISTER called for non-i64 type result.");
6186                              N->getOperand(0),
6187                              N->getOperand(1));
6203   SDValue Op = BC->getOperand(0);
6204   EVT DstVT = BC->getValueType(0);
6225   const APInt &APIntIndex = Index->getAPIntValue();
6232   // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6244 /// ExpandBITCAST - If the target supports VFP, this function is called to
6246 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
6253   SDValue Op = N->getOperand(0);
6258   EVT DstVT = N->getValueType(0);
6274   // Turn i64->f64 into VMOVDRR.
6286   // Turn f64->i64 into VMOVRRD.
6304 /// getZeroVector - Returns a vector of specified type with all zero elements.
6319 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6323   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6352                                          DAG.getConstant(VTBits - 1, dl, VT))
6363 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6367   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6405   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6428   SDValue Chain = Op->getOperand(0);
6429   SDValue RMValue = Op->getOperand(1);
6433   // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6434   // ((arg - 1) & 3) << 22).
6470   SDValue Chain = Op->getOperand(0);
6471   SDValue Mode = Op->getOperand(1);
6498   SDValue Chain = Op->getOperand(0);
6521   EVT VT = N->getValueType(0);
6522   if (VT.isVector() && ST->hasNEON()) {
6524     // Compute the least significant set bit: LSB = X & -X
6525     SDValue X = N->getOperand(0);
6532       // Compute with: cttz(x) = ctpop(lsb - 1)
6540         (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6541       // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6545                       DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6550     // Compute with: cttz(x) = ctpop(lsb - 1)
6552     // Compute LSB - 1.
6567   if (!ST->hasV6T2Ops())
6570   SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6576   EVT VT = N->getValueType(0);
6579   assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6586   SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6607 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
6619       !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6627 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
6636   return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6639 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
6653   if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6654     Cnt = -Cnt;
6662   EVT VT = N->getValueType(0);
6675   if (N->getOpcode() == ISD::SHL) {
6676     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6677       return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6679     return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6680                        N->getOperand(1));
6683   assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6686   if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6688         (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6689     return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6695   EVT ShiftVT = N->getOperand(1).getValueType();
6697       ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6699       (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6700   return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6705   EVT VT = N->getValueType(0);
6712   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6713           N->getOpcode() == ISD::SHL) &&
6716   unsigned ShOpc = N->getOpcode();
6717   if (ST->hasMVEIntegerOps()) {
6718     SDValue ShAmt = N->getOperand(1);
6724     if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6725         (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6729     if (ShAmt->getValueType(0) != MVT::i32)
6747         DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6757   if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6761   if (ST->isThumb1Only())
6764   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
6766   std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6770   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6790   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6794   if (ST->hasNEON())
6797     assert(ST->hasMVEIntegerOps() &&
6805     if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6813     // Special-case integer 64-bit equality comparisons. They aren't legal,
6831     // 64-bit comparisons are not legal in general.
6839       if (ST->hasMVEFloatOps()) {
6888       if (ST->hasMVEIntegerOps()) {
6905     if (ST->hasNEON() && Opc == ARMCC::EQ) {
6931   // comparison to a specialized compare-against-zero form.
6982       IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6990 /// isVMOVModifiedImm - Check if the specified splat value corresponds to a
7002   // immediate instructions others than VMOV do not support the 8-bit encoding
7004   // 32-bit version.
7012     // Any 1-byte value is OK.  Op=0, Cmode=1110.
7020     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
7037     // NEON's 32-bit VMOV supports splat values where:
7090     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7093     // and fall through here to test for a valid 64-bit splat.  But, then the
7100     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7117       unsigned Mask = (1 << BytesPerElem) - 1;
7122         NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7146   const APFloat &FPVal = CFP->getValueAPF();
7148   // Prevent floating-point constants from using literal loads
7149   // when execute-only is enabled.
7150   if (ST->genExecuteOnly()) {
7151     // We shouldn't trigger this for v6m execute-only
7152     assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7176   if (!ST->hasVFP3Base())
7180   // an SP-only FPU
7181   if (IsDouble && !Subtarget->hasFP64())
7187   if (ImmVal != -1) {
7188     if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7206   if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7316     Imm -= NumElts;
7341 // WhichResult gives the offset for each element in the mask based on which
7368   // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7385 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7440   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7447 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7476   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7514   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7521 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7547   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7554 /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7585   // Look for <15, ..., 3, -1, 1, 0>.
7587     if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7599   // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7670   if (!ST->hasMVEFloatOps())
7723   if (!ST->hasMVEFloatOps())
7767   Val = N->getAsZExtVal();
7769   if (ST->isThumb1Only()) {
7773     if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7784   assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7808       llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7822     bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7843   if (!ST->hasMVEIntegerOps())
7877   switch (N->getOpcode()) {
7886     return N->getOperand(1).getNode() == Op;
7888     switch (N->getConstantOperandVal(0)) {
7905       return N->getOperand(2).getNode() == Op;
7922   if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7931   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7937     if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7939         all_of(BVN->uses(),
7949     if ((ST->hasNEON() && SplatBitSize <= 64) ||
7950         (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7966           VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7975         if (ImmVal != -1) {
7983       if (ST->hasMVEIntegerOps() &&
8033     Value = ValueCounts.begin()->first;
8045   // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
8055       // constant-index forms.
8057       if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8058           (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8063         if (VT != Value->getOperand(0).getValueType()) {
8064           unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8072                         Value->getOperand(0), Value->getOperand(1));
8132   if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8133     // If we haven't found an efficient lowering, try splitting a 128-bit vector
8134     // into two 64-bit vectors; we might discover a better way to lower it.
8135     SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8149   // Vectors with 32- or 64-bit elements can be built by directly assigning
8153     // Do the expansion with floating-point types, since that is what the VFP
8166   // worse. For a vector with one or two non-undef values, that's
8239     Source->MinElt = std::min(Source->MinElt, EltNo);
8240     Source->MaxElt = std::max(Source->MaxElt, EltNo);
8292     if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8302       Src.WindowBase = -NumSrcElts;
8320       Src.WindowBase = -Src.MinElt;
8343   SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
8351     int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8365     int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8366     ExtractBase += NumElts * (Src - Sources.begin());
8419 /// isShuffleMaskLegal - Targets can use this to indicate that they only
8440     if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8455   else if (Subtarget->hasNEON() &&
8463   else if (Subtarget->hasMVEIntegerOps() &&
8467   else if (Subtarget->hasMVEIntegerOps() &&
8476 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8482   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8483   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
8502     // vrev <4 x i16> -> VREV32
8505     // vrev <4 x i8> -> VREV16
8513                        OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8519                        DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8523                        OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8527                        OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8531                        OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8547   if (V2.getNode()->isUndef())
8592   // of all ones or all zeroes and selecting the lanes based upon the real
8619   // Recast our new predicate-as-integer v16i8 vector into something
8628   ArrayRef<int> ShuffleMask = SVN->getMask();
8630   assert(ST->hasMVEIntegerOps() &&
8646   // to essentially promote the boolean predicate to a 8-bit integer, where
8650   // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8651   // fields in a register into 8 other arbitrary 2-bit fields!
8697     // Detect which mov lane this would be from the first non-undef element.
8698     int MovIdx = -1;
8702           return -1;
8708     if (MovIdx == -1)
8709       return -1;
8715         return -1;
8723     if (Elt != -1) {
8724       SDValue Input = Op->getOperand(0);
8726         Input = Op->getOperand(1);
8727         Elt -= 4;
8746             Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8748         VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8770   // An One-Off Identity mask is one that is mostly an identity mask from as
8771   // single source but contains a single element out-of-place, either from a
8777     OffElement = -1;
8780       if (Mask[i] == -1)
8784         if (OffElement == -1)
8790     return NonUndef > 2 && OffElement != -1;
8822   if (ST->hasMVEIntegerOps() && EltSize == 1)
8825   // Convert shuffles that are directly supported on NEON to target-specific
8829   // FIXME: floating-point vectors should be canonicalized to integer vectors
8831   ArrayRef<int> ShuffleMask = SVN->getMask();
8834     if (SVN->isSplat()) {
8835       int Lane = SVN->getSplatIndex();
8837       if (Lane == -1) Lane = 0;
8863     if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8877     if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8889     if (ST->hasNEON()) {
8898     if (ST->hasMVEIntegerOps()) {
8913     // ->
8918     // native shuffles produce larger results: the two-result ops.
8922     // ->
8925     if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8926       SDValue SubV1 = V1->getOperand(0);
8927       SDValue SubV2 = V1->getOperand(1);
8930       // We expect these to have been canonicalized to -1.
8940                "In-place shuffle of concat can only have one result!");
8949   if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8973   // the PerfectShuffle-generated table to synthesize it from other shuffles.
8991       if (ST->hasNEON())
8994         unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8995         unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9004   // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
9006     // Do the expansion with floating-point types, since that is what the VFP
9019                                   DAG.getConstant(ShuffleMask[i] & (NumElts-1),
9030   if (ST->hasNEON() && VT == MVT::v8i8)
9034   if (ST->hasMVEIntegerOps())
9046   assert(ST->hasMVEIntegerOps() &&
9050       DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9054   unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9072   if (Subtarget->hasMVEIntegerOps() &&
9080     // Reinterpret any such vector-element insertion as one with the
9109   assert(ST->hasMVEIntegerOps() &&
9113       DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9132   if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9150   assert(ST->hasMVEIntegerOps() &&
9213   SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
9227   EVT VT = Op->getValueType(0);
9228   if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9232   // two 64-bit vectors are concatenated to a 128-bit vector.
9258   unsigned Index = V2->getAsZExtVal();
9262   assert(ST->hasMVEIntegerOps() &&
9306   assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9307   EVT VT = N->getValueType(0);
9310   SDValue Op = N->getOperand(0);
9322   if (!Subtarget->hasMVEIntegerOps())
9325   EVT ToVT = N->getValueType(0);
9340   // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9345   // - Wherever possible combine them into an instruction that makes them
9349   // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9354   // - Otherwise we have an option. By default we would expand the
9358   // - The other option is to use the fact that loads/store can extend/truncate
9360   //   becomes 3 back-to-back memory operations, but at least that is less than
9370   EVT FromVT = N->getOperand(0).getValueType();
9382   if (!Subtarget->hasMVEIntegerOps())
9387   EVT ToVT = N->getValueType(0);
9390   SDValue Op = N->getOperand(0);
9401       N->getOpcode() == ISD::SIGN_EXTEND ? ARMISD::MVESEXT : ARMISD::MVEZEXT;
9406     Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9407     Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9413 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9414 /// element has been zero/sign-extended, depending on the isSigned parameter,
9419   EVT VT = N->getValueType(0);
9420   if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9421     SDNode *BVN = N->getOperand(0).getNode();
9422     if (BVN->getValueType(0) != MVT::v4i32 ||
9423         BVN->getOpcode() != ISD::BUILD_VECTOR)
9426     unsigned HiElt = 1 - LoElt;
9427     ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9428     ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9429     ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9430     ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9434       if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9435           Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9438       if (Hi0->isZero() && Hi1->isZero())
9444   if (N->getOpcode() != ISD::BUILD_VECTOR)
9447   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9448     SDNode *Elt = N->getOperand(i).getNode();
9453         if (!isIntN(HalfSize, C->getSExtValue()))
9456         if (!isUIntN(HalfSize, C->getZExtValue()))
9467 /// isSignExtended - Check if a node is a vector value that is sign-extended
9468 /// or a constant BUILD_VECTOR with sign-extended elements.
9470   if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9477 /// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9478 /// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9480   if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9505 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9506 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9513   // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9514   // 64-bits we need to insert a new extension so that it will be 64-bits.
9525 /// SkipLoadExtensionForVMULL - return a load of the original vector size that
9531   EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9534   if (ExtendedTy == LD->getMemoryVT())
9535     return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9536                        LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9537                        LD->getMemOperand()->getFlags());
9542   return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9543                         LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9544                         LD->getMemoryVT(), LD->getAlign(),
9545                         LD->getMemOperand()->getFlags());
9548 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9555   if (N->getOpcode() == ISD::SIGN_EXTEND ||
9556       N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9557     return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9558                                         N->getOperand(0)->getValueType(0),
9559                                         N->getValueType(0),
9560                                         N->getOpcode());
9570         DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9578   if (N->getOpcode() == ISD::BITCAST) {
9579     SDNode *BVN = N->getOperand(0).getNode();
9580     assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
9581            BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9585         {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9588   assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9589   EVT VT = N->getValueType(0);
9596     const APInt &CInt = N->getConstantOperandAPInt(i);
9605   unsigned Opcode = N->getOpcode();
9607     SDNode *N0 = N->getOperand(0).getNode();
9608     SDNode *N1 = N->getOperand(1).getNode();
9609     return N0->hasOneUse() && N1->hasOneUse() &&
9616   unsigned Opcode = N->getOpcode();
9618     SDNode *N0 = N->getOperand(0).getNode();
9619     SDNode *N1 = N->getOperand(1).getNode();
9620     return N0->hasOneUse() && N1->hasOneUse() &&
9627   // Multiplications are only custom-lowered for 128-bit vectors so that
9631          "unexpected type for custom-lowering ISD::MUL");
9684   // isel lowering to take advantage of no-stall back to back vmul + vmla.
9691   SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9692   SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9694   return DAG.getNode(N0->getOpcode(), DL, VT,
9703   // TODO: Should this propagate fast-math-flags?
9734   // TODO: Should this propagate fast-math-flags?
9775          "unexpected type for custom-lowering ISD::SDIV");
9809   // TODO: Should this propagate fast-math-flags?
9812          "unexpected type for custom-lowering ISD::UDIV");
9886   EVT VT = N->getValueType(0);
9919     // by ISD::USUBO_CARRY, so compute 1 - C.
9925   return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9929   assert(Subtarget->isTargetDarwin());
9947   bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9958     Entry.Ty = PointerType::getUnqual(RetTy->getContext());
10049   if (N->getOpcode() != ISD::SDIV)
10059   if (N->getOperand(0).getValueType().isVector())
10073   // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10097   SDValue Op = N->getOperand(1);
10098   if (N->getValueType(0) == MVT::i32)
10130   EVT MemVT = LD->getMemoryVT();
10135   assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10136          "Expected a non-extending load");
10137   assert(LD->isUnindexed() && "Expected a unindexed load");
10151       ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10153       LD->getMemOperand());
10158                       DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10169   EVT MemVT = LD->getMemoryVT();
10170   assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10172   if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10173       !Subtarget->isThumb1Only() && LD->isVolatile() &&
10174       LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10178         {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10188   EVT MemVT = ST->getMemoryVT();
10192   assert(MemVT == ST->getValue().getValueType());
10193   assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10194   assert(ST->isUnindexed() && "Expected a unindexed store");
10199   SDValue Build = ST->getValue();
10204                          ? MemVT.getVectorNumElements() - I - 1
10219       ST->getChain(), dl, GRP, ST->getBasePtr(),
10221       ST->getMemOperand());
10227   EVT MemVT = ST->getMemoryVT();
10228   assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10230   if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10231       !Subtarget->isThumb1Only() && ST->isVolatile() &&
10232       ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10237         ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10241         ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10246                                    {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10247                                    MemVT, ST->getMemOperand());
10248   } else if (Subtarget->hasMVEIntegerOps() &&
10259           (N->getOpcode() == ARMISD::VMOVIMM &&
10260            isNullConstant(N->getOperand(0))));
10266   SDValue Mask = N->getMask();
10267   SDValue PassThru = N->getPassThru();
10278       VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10279       N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10280       N->getExtensionType(), N->isExpandingLoad());
10284                             isZeroVector(PassThru->getOperand(0));
10292   if (!ST->hasMVEIntegerOps())
10297   switch (Op->getOpcode()) {
10309   SDValue Op0 = Op->getOperand(0);
10339     SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10340     SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10341     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10347     Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10351   if (EltVT != Op->getValueType(0))
10352     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10358   if (!ST->hasMVEFloatOps())
10365   if (!ST->hasNEON())
10369   SDValue Op0 = Op->getOperand(0);
10374   switch (Op->getOpcode()) {
10399   // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10420     switch (Op->getOpcode()) {
10438   if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10452   // Under Power Management extensions, the cycle-count is:
10454   SDValue Ops[] = { N->getOperand(0), // Chain
10488   assert(N->getValueType(0) == MVT::i64 &&
10490   SDValue Ops[] = {N->getOperand(1),
10491                    createGPRPairNode(DAG, N->getOperand(2)),
10492                    createGPRPairNode(DAG, N->getOperand(3)),
10493                    N->getOperand(0)};
10498   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10519   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10633     if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10637     if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10681     if (Subtarget->isTargetWindows())
10698   unsigned IntNo = N->getConstantOperandVal(0);
10713   std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10717                                 N->getOperand(1), N->getOperand(2),
10723 /// ReplaceNodeResults - Replace the results of node with an illegal result
10729   switch (N->getOpcode()) {
10765     assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10766     return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10792 //===----------------------------------------------------------------------===//
10794 //===----------------------------------------------------------------------===//
10796 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10802   assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10804   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10806   MachineFunction *MF = MBB->getParent();
10807   MachineRegisterInfo *MRI = &MF->getRegInfo();
10808   MachineConstantPool *MCP = MF->getConstantPool();
10809   ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
10810   const Function &F = MF->getFunction();
10812   bool isThumb = Subtarget->isThumb();
10813   bool isThumb2 = Subtarget->isThumb2();
10815   unsigned PCLabelId = AFI->createPICLabelUId();
10819   unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10826       MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
10830       MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
10840     Register NewVReg1 = MRI->createVirtualRegister(TRC);
10841     BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10846     Register NewVReg2 = MRI->createVirtualRegister(TRC);
10847     BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10852     Register NewVReg3 = MRI->createVirtualRegister(TRC);
10853     BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10856     BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10870     Register NewVReg1 = MRI->createVirtualRegister(TRC);
10871     BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10875     Register NewVReg2 = MRI->createVirtualRegister(TRC);
10876     BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10880     Register NewVReg3 = MRI->createVirtualRegister(TRC);
10881     BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10885     Register NewVReg4 = MRI->createVirtualRegister(TRC);
10886     BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10891     Register NewVReg5 = MRI->createVirtualRegister(TRC);
10892     BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10895     BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10906     Register NewVReg1 = MRI->createVirtualRegister(TRC);
10907     BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10912     Register NewVReg2 = MRI->createVirtualRegister(TRC);
10913     BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10917     BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10928   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10930   MachineFunction *MF = MBB->getParent();
10931   MachineRegisterInfo *MRI = &MF->getRegInfo();
10932   MachineFrameInfo &MFI = MF->getFrameInfo();
10935   const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10953       if (!MF->hasCallSiteLandingPad(Sym)) continue;
10955       SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10972       InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10981     MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10982   unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10987   MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10988   DispatchBB->setIsEHPad();
10990   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10992   if (Subtarget->isThumb())
10995     trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10997   BuildMI(TrapBB, dl, TII->get(trap_opcode));
10998   DispatchBB->addSuccessor(TrapBB);
11000   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
11001   DispatchBB->addSuccessor(DispContBB);
11004   MF->insert(MF->end(), DispatchBB);
11005   MF->insert(MF->end(), DispContBB);
11006   MF->insert(MF->end(), TrapBB);
11012   MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
11017   MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
11020   const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
11030   if (Subtarget->isThumb2()) {
11031     Register NewVReg1 = MRI->createVirtualRegister(TRC);
11032     BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11039       BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11044       Register VReg1 = MRI->createVirtualRegister(TRC);
11045       BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11051         VReg2 = MRI->createVirtualRegister(TRC);
11052         BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11058       BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11064     BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11069     Register NewVReg3 = MRI->createVirtualRegister(TRC);
11070     BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11074     Register NewVReg4 = MRI->createVirtualRegister(TRC);
11075     BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11082     BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11086   } else if (Subtarget->isThumb()) {
11087     Register NewVReg1 = MRI->createVirtualRegister(TRC);
11088     BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11095       BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11100       MachineConstantPool *ConstantPool = MF->getConstantPool();
11101       Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11105       Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11106       unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11108       Register VReg1 = MRI->createVirtualRegister(TRC);
11109       BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11113       BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11119     BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11124     Register NewVReg2 = MRI->createVirtualRegister(TRC);
11125     BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11131     Register NewVReg3 = MRI->createVirtualRegister(TRC);
11132     BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11136     Register NewVReg4 = MRI->createVirtualRegister(TRC);
11137     BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11144         MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11147     Register NewVReg5 = MRI->createVirtualRegister(TRC);
11148     BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11156       NewVReg6 = MRI->createVirtualRegister(TRC);
11157       BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11164     BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11168     Register NewVReg1 = MRI->createVirtualRegister(TRC);
11169     BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11176       BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11180     } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11181       Register VReg1 = MRI->createVirtualRegister(TRC);
11182       BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11188         VReg2 = MRI->createVirtualRegister(TRC);
11189         BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11195       BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11200       MachineConstantPool *ConstantPool = MF->getConstantPool();
11201       Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11205       Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11206       unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11208       Register VReg1 = MRI->createVirtualRegister(TRC);
11209       BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11214       BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11220     BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11225     Register NewVReg3 = MRI->createVirtualRegister(TRC);
11226     BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11231     Register NewVReg4 = MRI->createVirtualRegister(TRC);
11232     BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11237         MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11239     Register NewVReg5 = MRI->createVirtualRegister(TRC);
11240     BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11248       BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11253       BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11263       DispContBB->addSuccessor(CurMBB);
11273     SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11276       if (SMBB->isEHPad()) {
11277         BB->removeSuccessor(SMBB);
11282     BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11283     BB->normalizeSuccProbs();
11285     // Find the invoke call and mark all of the callee-saved registers as
11290            II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11291       if (!II->isCall()) continue;
11295              OI = II->operands_begin(), OE = II->operands_end();
11297         if (!OI->isReg()) continue;
11298         DefRegs[OI->getReg()] = true;
11305         if (Subtarget->isThumb2() &&
11309         if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11311         if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11321   // Mark all former landing pads as non-landing pads. The dispatch is the only
11324     MBBLPad->setIsEHPad(false);
11332   for (MachineBasicBlock *S : MBB->successors())
11376 /// Emit a post-increment load operation with given size. The instructions
11385     BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11392     BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11396     BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11402     BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11408     BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11417 /// Emit a post-increment store operation with given size. The instructions
11426     BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11433     BuildMI(*BB, Pos, dl, TII->get(StOpc))
11438     BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11444     BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11450     BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11463   // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11465   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11466   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11467   MachineFunction::iterator It = ++BB->getIterator();
11475   MachineFunction *MF = BB->getParent();
11476   MachineRegisterInfo &MRI = MF->getRegInfo();
11481   bool IsThumb1 = Subtarget->isThumb1Only();
11482   bool IsThumb2 = Subtarget->isThumb2();
11483   bool IsThumb = Subtarget->isThumb();
11491     if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11492         Subtarget->hasNEON()) {
11512   unsigned LoopSize = SizeVal - BytesLeft;
11514   if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11553   //   movw varEnd, # --> with thumb2
11555   //   ldrcp varEnd, idx --> without thumb2
11556   //   fallthrough --> loopMBB
11565   //   fallthrough --> exitMBB
11567   //   epilogue to handle left-over bytes
11570   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11571   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11572   MF->insert(It, loopMBB);
11573   MF->insert(It, exitMBB);
11576   unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11577   loopMBB->setCallFrameSize(CallFrameSize);
11578   exitMBB->setCallFrameSize(CallFrameSize);
11581   exitMBB->splice(exitMBB->begin(), BB,
11582                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11583   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11587   if (Subtarget->useMovt()) {
11588     BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11591   } else if (Subtarget->genExecuteOnly()) {
11592     assert(IsThumb && "Non-thumb expected to have used movt");
11593     BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11595     MachineConstantPool *ConstantPool = MF->getConstantPool();
11596     Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11600     Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11601     unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11603         MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
11607       BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11613       BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11620   BB->addSuccessor(loopMBB);
11635   BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11638   BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11641   BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11648   emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11650   emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11655     BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11662         BuildMI(*BB, BB->end(), dl,
11663                 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11668     MIB->getOperand(5).setReg(ARM::CPSR);
11669     MIB->getOperand(5).setIsDef(true);
11671   BuildMI(*BB, BB->end(), dl,
11672           TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11676   BB->addSuccessor(loopMBB);
11677   BB->addSuccessor(exitMBB);
11681   auto StartOfExit = exitMBB->begin();
11707   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11710   assert(Subtarget->isTargetWindows() &&
11712   assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11720   // thumb-2 environment, so there is no interworking required.  As a result, we
11728   // branches for Thumb), we can generate the long-call version via
11729   // -mcmodel=large, alleviating the need for the trampoline which may clobber
11749     MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
11754     BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
11782   MachineFunction *MF = MBB->getParent();
11783   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11785   MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
11786   MF->insert(++MBB->getIterator(), ContBB);
11787   ContBB->splice(ContBB->begin(), MBB,
11788                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11789   ContBB->transferSuccessorsAndUpdatePHIs(MBB);
11790   MBB->addSuccessor(ContBB);
11792   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11793   BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11794   MF->push_back(TrapBB);
11795   MBB->addSuccessor(TrapBB);
11797   BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11801   BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11820   for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11825       break; // Should have kill-flag - update below.
11830   if (miI == BB->end()) {
11831     for (MachineBasicBlock *Succ : BB->successors())
11832       if (Succ->isLiveIn(ARM::CPSR))
11838   SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11851   BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11858   BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11865   BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11868   BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11872   BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11896     BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11906   BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11916   BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11925   BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11933   BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11939   BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11949     BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11960   BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11971   BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11975   BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11979   BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11987   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11989   bool isThumb2 = Subtarget->isThumb2();
11996   // Thumb1 post-indexed loads are really just single-register LDMs.
11999     BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
12028     //          |-----------------|
12031     //          |         TP loop Body MBB<--|
12037     MachineFunction *MF = BB->getParent();
12038     MachineFunctionProperties &Properties = MF->getProperties();
12039     MachineRegisterInfo &MRI = MF->getRegInfo();
12047     MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12050     MF->push_back(TpLoopBody);
12062     TpExit = BB->splitAt(MI, false);
12064       assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12066       TpExit = BB->getFallThrough();
12067       BuildMI(BB, dl, TII->get(ARM::t2B))
12070       TpExit = BB->splitAt(MI, false);
12086     TpEntry->addSuccessor(TpLoopBody);
12087     TpLoopBody->addSuccessor(TpLoopBody);
12088     TpLoopBody->addSuccessor(TpExit);
12091     TpLoopBody->moveAfter(TpEntry);
12092     TpExit->moveAfter(TpLoopBody);
12102   // The Thumb2 pre-indexed stores have the same MI operands, they just
12106     MI.setDesc(TII->get(ARM::t2STR_PRE));
12109     MI.setDesc(TII->get(ARM::t2STRB_PRE));
12112     MI.setDesc(TII->get(ARM::t2STRH_PRE));
12124       Offset = -Offset;
12127     BuildMI(*BB, MI, dl, TII->get(NewOpc))
12148     MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12157     // diamond control-flow pattern.  The incoming instruction knows the
12160     const BasicBlock *LLVM_BB = BB->getBasicBlock();
12161     MachineFunction::iterator It = ++BB->getIterator();
12168     //   fallthrough --> copy0MBB
12170     MachineFunction *F = BB->getParent();
12171     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12172     MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
12173     F->insert(It, copy0MBB);
12174     F->insert(It, sinkMBB);
12177     unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12178     copy0MBB->setCallFrameSize(CallFrameSize);
12179     sinkMBB->setCallFrameSize(CallFrameSize);
12182     const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12185       copy0MBB->addLiveIn(ARM::CPSR);
12186       sinkMBB->addLiveIn(ARM::CPSR);
12190     sinkMBB->splice(sinkMBB->begin(), BB,
12191                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12192     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12194     BB->addSuccessor(copy0MBB);
12195     BB->addSuccessor(sinkMBB);
12197     BuildMI(BB, dl, TII->get(ARM::tBcc))
12207     // Update machine-CFG edges
12208     BB->addSuccessor(sinkMBB);
12214     BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12227     BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12236       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12240       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12246       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12250       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12260     BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12263       BuildMI(BB, dl, TII->get(ARM::t2B))
12267       BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12287     // diamond control-flow pattern.  The incoming instruction knows the
12298     const BasicBlock *LLVM_BB = BB->getBasicBlock();
12299     MachineFunction::iterator BBI = ++BB->getIterator();
12300     MachineFunction *Fn = BB->getParent();
12301     MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12302     MachineBasicBlock *SinkBB  = Fn->CreateMachineBasicBlock(LLVM_BB);
12303     Fn->insert(BBI, RSBBB);
12304     Fn->insert(BBI, SinkBB);
12309     bool isThumb2 = Subtarget->isThumb2();
12310     MachineRegisterInfo &MRI = Fn->getRegInfo();
12317     SinkBB->splice(SinkBB->begin(), BB,
12318                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
12319     SinkBB->transferSuccessorsAndUpdatePHIs(BB);
12321     BB->addSuccessor(RSBBB);
12322     BB->addSuccessor(SinkBB);
12325     RSBBB->addSuccessor(SinkBB);
12328     BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12335       TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12340     // by if-conversion pass
12341     BuildMI(*RSBBB, RSBBB->begin(), dl,
12342             TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12350     BuildMI(*SinkBB, SinkBB->begin(), dl,
12351       TII->get(ARM::PHI), ABSDstReg)
12372 /// when it is expanded into LDM/STM. This is done as a post-isel lowering
12376   bool isThumb1 = Subtarget->isThumb1Only();
12379   MachineFunction *MF = MI.getParent()->getParent();
12380   MachineRegisterInfo &MRI = MF->getRegInfo();
12384   if (!Node->hasAnyUseOfValue(0)) {
12387   if (!Node->hasAnyUseOfValue(1)) {
12412   // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12418     const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12419     MCID = &TII->get(NewOpc);
12421     assert(MCID->getNumOperands() ==
12422            MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12432     if (Subtarget->isThumb1Only()) {
12433       for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12439       for (unsigned i = MI.getNumOperands(); i--;) {
12442           int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12443           if (DefIdx != -1)
12452       ccOutIdx = MCID->getNumOperands() - 1;
12454     ccOutIdx = MCID->getNumOperands() - 1;
12458   if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12466   for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12481   assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12486     if (!Subtarget->isThumb1Only())
12497 //===----------------------------------------------------------------------===//
12499 //===----------------------------------------------------------------------===//
12513 //   (select cc -1, y)  [AllOnes=1]
12514 //   (select cc y, -1)  [AllOnes=1]
12522   switch (N->getOpcode()) {
12525     CC = N->getOperand(0);
12526     SDValue N1 = N->getOperand(1);
12527     SDValue N2 = N->getOperand(2);
12547     EVT VT = N->getValueType(0);
12548     CC = N->getOperand(0);
12556     else if (N->getOpcode() == ISD::ZERO_EXTEND)
12568 //   (add (select cc, 0, c), x)  -> (select cc, x, (add, x, c))
12569 //   (sub x, (select cc, 0, c))  -> (select cc, x, (sub, x, c))
12570 //   (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))  [AllOnes=1]
12571 //   (or  (select cc, 0, c), x)  -> (select cc, x, (or, x, c))
12572 //   (xor (select cc, 0, c), x)  -> (select cc, x, (xor, x, c))
12579 //   (add (zext cc), x) -> (select cc (add x, 1), x)
12580 //   (add (sext cc), x) -> (select cc (add x, -1), x)
12595   EVT VT = N->getValueType(0);
12605   SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12619   SDValue N0 = N->getOperand(0);
12620   SDValue N1 = N->getOperand(1);
12621   if (N0.getNode()->hasOneUse())
12624   if (N1.getNode()->hasOneUse())
12632   if (N->getOpcode() == ARMISD::VUZP)
12636   if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12650   // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12651   if (!N->getValueType(0).is64BitVector())
12659   EVT VT = N->getValueType(0);
12664   Ops.push_back(Unzip->getOperand(0));
12665   Ops.push_back(Unzip->getOperand(1));
12698   EVT VT = N->getValueType(0);
12728   if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12734   EVT VT = N->getValueType(0);
12745   if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12747   SDValue Vec = N0->getOperand(0)->getOperand(0);
12754   for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12755     if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
12756         && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12758       SDValue ExtVec0 = N0->getOperand(i);
12759       SDValue ExtVec1 = N1->getOperand(i);
12762       if (V != ExtVec0->getOperand(0).getNode() ||
12763           V != ExtVec1->getOperand(0).getNode())
12767       ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12768       ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12771       if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12772           || C1->getZExtValue() != nextIndex+1)
12821   if (V->getOpcode() == ISD::UMUL_LOHI ||
12822       V->getOpcode() == ISD::SMUL_LOHI)
12830   if (!Subtarget->hasBaseDSP())
12833   // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12834   // accumulates the product into a 64-bit value. The 16-bit values will
12835   // be sign extended somehow or SRA'd into 32-bit values
12837   SDValue Mul = AddcNode->getOperand(0);
12838   SDValue Lo = AddcNode->getOperand(1);
12840     Lo = AddcNode->getOperand(0);
12841     Mul = AddcNode->getOperand(1);
12846   SDValue SRA = AddeNode->getOperand(0);
12847   SDValue Hi = AddeNode->getOperand(1);
12849     SRA = AddeNode->getOperand(1);
12850     Hi = AddeNode->getOperand(0);
12855     if (Const->getZExtValue() != 31)
12883     Op0 = Mul->getOperand(0).getOperand(0);
12884     Op1 = Mul->getOperand(1).getOperand(0);
12916   //    loAdd ->  ADDC         |
12919   //                    ADDE   <- hiAdd
12927   assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12928           AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12931   assert(AddeSubeNode->getNumOperands() == 3 &&
12932          AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12936   SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12937   if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12938        AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12939       (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12940        AddcSubcNode->getOpcode() != ARMISD::SUBC))
12943   SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12944   SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12950   assert(AddcSubcNode->getNumValues() == 2 &&
12951          AddcSubcNode->getValueType(0) == MVT::i32 &&
12955   // maybe a SMLAL which multiplies two 16-bit values.
12956   if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12957       AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12958       AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12959       AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12960       AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12964   SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12965   SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12982   unsigned Opc = MULOp->getOpcode();
13016   if (AddcSubcNode == HiAddSub->getNode() ||
13017       AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
13025   Ops.push_back(LoMul->getOperand(0));
13026   Ops.push_back(LoMul->getOperand(1));
13032   if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13033       FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13034       LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13035       static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13038     if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13047   } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13079   if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13083   SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13084   if (AddcNode->getOpcode() != ARMISD::ADDC)
13090   if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13091     UmlalNode = AddcNode->getOperand(0).getNode();
13092     AddHi = AddcNode->getOperand(1);
13093   } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13094     UmlalNode = AddcNode->getOperand(1).getNode();
13095     AddHi = AddcNode->getOperand(0);
13102   if (!isNullConstant(UmlalNode->getOperand(3)))
13105   if ((isNullConstant(AddeNode->getOperand(0)) &&
13106        AddeNode->getOperand(1).getNode() == UmlalNode) ||
13107       (AddeNode->getOperand(0).getNode() == UmlalNode &&
13108        isNullConstant(AddeNode->getOperand(1)))) {
13110     SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13111                       UmlalNode->getOperand(2), AddHi };
13127   if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13132   SDNode* AddcNode = N->getOperand(2).getNode();
13133   SDNode* AddeNode = N->getOperand(3).getNode();
13134   if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13135       (AddeNode->getOpcode() == ARMISD::ADDE) &&
13136       isNullConstant(AddeNode->getOperand(0)) &&
13137       isNullConstant(AddeNode->getOperand(1)) &&
13138       (AddeNode->getOperand(2).getNode() == AddcNode))
13141                        {N->getOperand(0), N->getOperand(1),
13142                         AddcNode->getOperand(0), AddcNode->getOperand(1)});
13152   if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13153     // (SUBC (ADDE 0, 0, C), 1) -> C
13154     SDValue LHS = N->getOperand(0);
13155     SDValue RHS = N->getOperand(1);
13156     if (LHS->getOpcode() == ARMISD::ADDE &&
13157         isNullConstant(LHS->getOperand(0)) &&
13158         isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13159       return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13163   if (Subtarget->isThumb1Only()) {
13164     SDValue RHS = N->getOperand(1);
13166       int32_t imm = C->getSExtValue();
13169         RHS = DAG.getConstant(-imm, DL, MVT::i32);
13170         unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13172         return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13183   if (Subtarget->isThumb1Only()) {
13185     SDValue RHS = N->getOperand(1);
13187       int64_t imm = C->getSExtValue();
13191         // The with-carry-in form matches bitwise not instead of the negation.
13196         unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13198         return DAG.getNode(Opcode, DL, N->getVTList(),
13199                            N->getOperand(0), RHS, N->getOperand(2));
13202   } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13211   if (!Subtarget->hasMVEIntegerOps())
13222   if (N->getOpcode() == ISD::SELECT &&
13223       N->getOperand(0)->getOpcode() == ISD::SETCC) {
13224     SetCC = N->getOperand(0);
13225     LHS = SetCC->getOperand(0);
13226     RHS = SetCC->getOperand(1);
13227     CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13228     TrueVal = N->getOperand(1);
13229     FalseVal = N->getOperand(2);
13230   } else if (N->getOpcode() == ISD::SELECT_CC) {
13231     LHS = N->getOperand(0);
13232     RHS = N->getOperand(1);
13233     CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13234     TrueVal = N->getOperand(2);
13235     FalseVal = N->getOperand(3);
13241   if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13242        FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13247   } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13248               FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13253   } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13254               FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13259   } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13260               FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13269   switch (TrueVal->getOpcode()) {
13279   EVT VectorType = FalseVal->getOperand(0).getValueType();
13291   EVT LeftType = LHS->getValueType(0);
13292   EVT RightType = RHS->getValueType(0);
13304       DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13320   EVT VT = N->getValueType(0);
13327   if (N->getOpcode() == ISD::SMIN) {
13328     Shft = N->getOperand(0);
13329     Clamp = isConstOrConstSplat(N->getOperand(1));
13330   } else if (N->getOpcode() == ISD::VSELECT) {
13332     SDValue Cmp = N->getOperand(0);
13334         cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13335         Cmp.getOperand(0) != N->getOperand(1) ||
13336         Cmp.getOperand(1) != N->getOperand(2))
13338     Shft = N->getOperand(1);
13339     Clamp = isConstOrConstSplat(N->getOperand(2));
13348   switch (Clamp->getSExtValue()) {
13349   case (1 << 7) - 1:
13353   case (1 << 15) - 1:
13357   case (1ULL << 31) - 1:
13368   if (!N1 || N1->getSExtValue() != ShftAmt)
13430   if (!Subtarget->hasMVEIntegerOps())
13438   // We need to re-implement this optimization here as the implementation in the
13439   // Target-Independent DAGCombiner does not handle the kind of constant we make
13440   // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13445   if (N->getOperand(0).getOpcode() != ISD::XOR)
13447   SDValue XOR = N->getOperand(0);
13453       isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13455   if (!Const || !Const->isOne())
13459   SDValue Cond = XOR->getOperand(0);
13460   SDValue LHS = N->getOperand(1);
13461   SDValue RHS = N->getOperand(2);
13462   EVT Type = N->getValueType(0);
13466 // Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13470   SDValue Op0 = N->getOperand(0);
13471   SDValue Op1 = N->getOperand(1);
13472   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13473   EVT VT = N->getValueType(0);
13475   if (!Subtarget->hasMVEIntegerOps() ||
13525 /// PerformADDECombine - Target-specific dag combine transform from
13532   if (Subtarget->isThumb1Only())
13541 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13559   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13560   if (N0.getNode()->hasOneUse())
13567   EVT VT = N->getValueType(0);
13568   SDValue N0 = N->getOperand(0);
13569   SDValue N1 = N->getOperand(1);
13585     // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13590         !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13594     // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13597         N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13611       SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13612                                  N1.getOperand(1 - N1RedOp));
13644       if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13645           !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13646           Load1->isIndexed())
13657         return -1;
13664     if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13713   if (!Subtarget->hasMVEIntegerOps())
13719   EVT VT = N->getValueType(0);
13720   SDValue N0 = N->getOperand(0);
13721   SDValue N1 = N->getOperand(1);
13737     if (NB->getOpcode() != ISD::BUILD_PAIR)
13739     SDValue VecRed = NB->getOperand(0);
13740     if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13742         NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13745     if (VecRed->getOpcode() == OpcodeA) {
13746       // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13755     unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13757       Ops.push_back(VecRed->getOperand(I));
13802   assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13803           N->getOpcode() == ISD::SRL) &&
13809   if (N->getOpcode() != ISD::SHL)
13812   if (Subtarget->isThumb1Only()) {
13816     if (N->getOpcode() != ISD::SHL)
13818     SDValue N1 = N->getOperand(0);
13819     if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13820         N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13822     if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13823       if (Const->getAPIntValue().ult(256))
13825       if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13826           Const->getAPIntValue().sgt(-256))
13832   // Turn off commute-with-shift transform after legalization, so it doesn't
13841   assert(N->getOpcode() == ISD::XOR &&
13842          (N->getOperand(0).getOpcode() == ISD::SHL ||
13843           N->getOperand(0).getOpcode() == ISD::SRL) &&
13847   auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13848   auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13851     if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13852       unsigned ShiftAmt = ShiftC->getZExtValue();
13853       unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13854       if (N->getOperand(0).getOpcode() == ISD::SHL)
13855         return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13856       return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13865   assert(((N->getOpcode() == ISD::SHL &&
13866            N->getOperand(0).getOpcode() == ISD::SRL) ||
13867           (N->getOpcode() == ISD::SRL &&
13868            N->getOperand(0).getOpcode() == ISD::SHL)) &&
13869          "Expected shift-shift mask");
13871   if (!Subtarget->isThumb1Only())
13882   return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13886   if (!Subtarget->hasNEON()) {
13887     if (Subtarget->isThumb1Only())
13901     return Subtarget->hasVFP2Base();
13903     return Subtarget->hasVFP2Base();
13905     return Subtarget->hasFP64();
13908     return Subtarget->hasMVEFloatOps();
13922   // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13923   // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13933   // - if c1 and c2 are small enough that they don't require mov imms.
13934   // - the user(s) of the node can perform an shl
13936   // No shifted operands for 16-bit instructions.
13937   if (ST->isThumb() && ST->isThumb1Only())
13941   for (auto *U : N->uses()) {
13942     switch(U->getOpcode()) {
13955       if (isa<ConstantSDNode>(U->getOperand(0)) ||
13956           isa<ConstantSDNode>(U->getOperand(1)))
13960       if (U->getOperand(0).getOpcode() == ISD::SHL ||
13961           U->getOperand(1).getOpcode() == ISD::SHL)
13967   if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13968       N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13971   if (N->getOperand(0).getOpcode() != ISD::SHL)
13974   SDValue SHL = N->getOperand(0);
13976   auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13981   APInt C2Int = C2->getAPIntValue();
13982   APInt C1Int = C1ShlC2->getAPIntValue();
13989   APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13996   // The immediates are encoded as an 8-bit value that can be rotated.
13999     return Imm.getBitWidth() - Zeros > 8;
14008   SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
14014              SHL.dump(); N->dump());
14020 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14025   SDValue N0 = N->getOperand(0);
14026   SDValue N1 = N->getOperand(1);
14043 // Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14044 //   providing -X is as cheap as X (currently, just a constant).
14046   if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14048   SDValue CSINC = N->getOperand(1);
14057                      DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14063 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14068   SDValue N0 = N->getOperand(0);
14069   SDValue N1 = N->getOperand(1);
14071   // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14072   if (N1.getNode()->hasOneUse())
14079   if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14082   // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14085   SDValue VDup = N->getOperand(1);
14086   if (VDup->getOpcode() != ARMISD::VDUP)
14089   SDValue VMov = N->getOperand(0);
14090   if (VMov->getOpcode() == ISD::BITCAST)
14091     VMov = VMov->getOperand(0);
14093   if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14099                                    VDup->getOperand(0));
14100   return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14121   if (!Subtarget->hasVMLxForwarding())
14125   SDValue N0 = N->getOperand(0);
14126   SDValue N1 = N->getOperand(1);
14140   EVT VT = N->getValueType(0);
14142   SDValue N00 = N0->getOperand(0);
14143   SDValue N01 = N0->getOperand(1);
14151   EVT VT = N->getValueType(0);
14155   SDValue N0 = N->getOperand(0);
14156   SDValue N1 = N->getOperand(1);
14159     if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14161     EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14163       return Op->getOperand(0);
14168     // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14172     if (!Subtarget->isLittle())
14176     if (And->getOpcode() == ISD::BITCAST)
14177       And = And->getOperand(0);
14178     if (And->getOpcode() != ISD::AND)
14180     SDValue Mask = And->getOperand(1);
14181     if (Mask->getOpcode() == ISD::BITCAST)
14182       Mask = Mask->getOperand(0);
14184     if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14187     if (isAllOnesConstant(Mask->getOperand(0)) &&
14188         isNullConstant(Mask->getOperand(1)) &&
14189         isAllOnesConstant(Mask->getOperand(2)) &&
14190         isNullConstant(Mask->getOperand(3)))
14191       return And->getOperand(0);
14219   EVT VT = N->getValueType(0);
14220   if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14223   if (Subtarget->isThumb1Only())
14234   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14238   int64_t MulAmt = C->getSExtValue();
14241   ShiftAmt = ShiftAmt & (32 - 1);
14242   SDValue V = N->getOperand(0);
14249     if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14255                                     DAG.getConstant(Log2_32(MulAmt - 1), DL,
14258       // (mul x, 2^N - 1) => (sub (shl x, N), x)
14268     uint64_t MulAmtAbs = -MulAmt;
14270       // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14277     } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14278       // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14283                                     DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14303   // Allow DAGCombine to pattern-match before we touch the canonical form.
14307   if (N->getValueType(0) != MVT::i32)
14310   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14314   uint32_t C1 = (uint32_t)N1C->getZExtValue();
14319   SDNode *N0 = N->getOperand(0).getNode();
14320   if (!N0->hasOneUse())
14323   if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14326   bool LeftShift = N0->getOpcode() == ISD::SHL;
14328   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14332   uint32_t C2 = (uint32_t)N01C->getZExtValue();
14338     C1 &= (-1U << C2);
14340     C1 &= (-1U >> C2);
14354       SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14355                                 DAG.getConstant(C3 - C2, DL, MVT::i32));
14365       SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14366                                 DAG.getConstant(C3 - C2, DL, MVT::i32));
14378       SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14391       SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14403     SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14415   // Attempt to use immediate-form VBIC
14416   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14418   EVT VT = N->getValueType(0);
14428   if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14429       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14438           DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14445   if (!Subtarget->isThumb1Only()) {
14446     // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14454   if (Subtarget->isThumb1Only())
14465   if (!Subtarget->hasV6Ops() ||
14466       (Subtarget->isThumb() &&
14467        (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14470   SDValue SRL = OR->getOperand(0);
14471   SDValue SHL = OR->getOperand(1);
14474     SRL = OR->getOperand(1);
14475     SHL = OR->getOperand(0);
14493   // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14494   // For SMUWB the 16-bit value will signed extended somehow.
14497   SDValue OpS16 = SMULLOHI->getOperand(0);
14498   SDValue OpS32 = SMULLOHI->getOperand(1);
14503     OpS32 = SMULLOHI->getOperand(0);
14512     OpS16 = OpS16->getOperand(0);
14526   if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14529   EVT VT = N->getValueType(0);
14530   SDValue N0 = N->getOperand(0);
14531   SDValue N1 = N->getOperand(1);
14556   unsigned Mask = MaskC->getZExtValue();
14563     unsigned Val = N1C->getZExtValue();
14584     unsigned Mask2 = N11C->getZExtValue();
14592       if (Subtarget->hasDSP() &&
14609       if (Subtarget->hasDSP() &&
14625   if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14631     unsigned ShAmtC = ShAmt->getAsZExtVal();
14666   if (N->getOpcode() == ARMISD::VCMP)
14667     return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14668   else if (N->getOpcode() == ARMISD::VCMPZ)
14669     return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14676   return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14681   // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14683   EVT VT = N->getValueType(0);
14685   SDValue N0 = N->getOperand(0);
14686   SDValue N1 = N->getOperand(1);
14689     if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14704 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14708   // Attempt to use immediate-form VORR
14709   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14711   EVT VT = N->getValueType(0);
14717   if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14724   if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14725       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14734           DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14741   if (!Subtarget->isThumb1Only()) {
14742     // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14749   SDValue N0 = N->getOperand(0);
14750   SDValue N1 = N->getOperand(1);
14753   if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14767     BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14768     BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14770     if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14772         if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14783                                              N0->getOperand(1),
14784                                              N0->getOperand(0),
14785                                              N1->getOperand(0));
14808   EVT VT = N->getValueType(0);
14814   if (!Subtarget->isThumb1Only()) {
14815     // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14823   if (Subtarget->hasMVEIntegerOps()) {
14825     SDValue N0 = N->getOperand(0);
14826     SDValue N1 = N->getOperand(1);
14827     const TargetLowering *TLI = Subtarget->getTargetLowering();
14828     if (TLI->isConstTrueVal(N1) &&
14829         (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14835         Ops.push_back(N0->getOperand(0));
14836         if (N0->getOpcode() == ARMISD::VCMP)
14837           Ops.push_back(N0->getOperand(1));
14839         return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14847 // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14851   assert(N->getOpcode() == ARMISD::BFI);
14853   SDValue From = N->getOperand(1);
14854   ToMask = ~N->getConstantOperandAPInt(2);
14859   if (From->getOpcode() == ISD::SRL &&
14860       isa<ConstantSDNode>(From->getOperand(1))) {
14861     APInt Shift = From->getConstantOperandAPInt(1);
14864     From = From->getOperand(0);
14875   unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14876   return LastActiveBitInA - 1 == FirstActiveBitInB;
14883   SDValue To = N->getOperand(0);
14911   SDValue N0 = N->getOperand(0);
14912   SDValue N1 = N->getOperand(1);
14915     // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14920     unsigned InvMask = N->getConstantOperandVal(2);
14922     unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14926     unsigned Mask = (1u << Width) - 1;
14927     unsigned Mask2 = N11C->getZExtValue();
14929       return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14930                          N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14949     EVT VT = N->getValueType(0);
14963   if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14964     APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14971     EVT VT = N->getValueType(0);
14974                                N->getOperand(1), N->getOperand(2));
14986   if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14988   SDValue CSInc = Cmp->getOperand(0);
14995          CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
15000       isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15005       isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15010       isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
15033   // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15034   // if C1==EQ -> CSXYZ A, B, C2, D
15035   // if C1==NE -> CSXYZ A, B, NOT(C2), D
15037   if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15038     if (N->getConstantOperandVal(2) == ARMCC::EQ)
15039       return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15040                          N->getOperand(1),
15042     if (N->getConstantOperandVal(2) == ARMCC::NE)
15044           N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15045           N->getOperand(1),
15051 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15056   // vmovrrd(vmovdrr x, y) -> x,y
15057   SDValue InDouble = N->getOperand(0);
15058   if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15061   // vmovrrd(load f64) -> (load i32), (load i32)
15063   if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15064       InNode->getValueType(0) == MVT::f64 &&
15065       InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15066       !cast<LoadSDNode>(InNode)->isVolatile()) {
15067     // TODO: Should this be done for non-FrameIndex operands?
15072     SDValue BasePtr = LD->getBasePtr();
15074         DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15075                     LD->getAlign(), LD->getMemOperand()->getFlags());
15080     SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15081                                  LD->getPointerInfo().getWithOffset(4),
15082                                  commonAlignment(LD->getAlign(), 4),
15083                                  LD->getMemOperand()->getFlags());
15092   // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15093   // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15116       if (!Subtarget->isLittle() && BVSwap)
15134     if (!Subtarget->isLittle() && BVSwap)
15143 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15146   // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15147   SDValue Op0 = N->getOperand(0);
15148   SDValue Op1 = N->getOperand(1);
15157                        N->getValueType(0), Op0.getOperand(0));
15163   SDValue Op0 = N->getOperand(0);
15165   // VMOVhr (VMOVrh (X)) -> X
15166   if (Op0->getOpcode() == ARMISD::VMOVrh)
15167     return Op0->getOperand(0);
15169   // FullFP16: half values are passed in S-registers, and we don't
15177   if (Op0->getOpcode() == ISD::BITCAST) {
15178     SDValue Copy = Op0->getOperand(0);
15180         Copy->getOpcode() == ISD::CopyFromReg) {
15181       bool HasGlue = Copy->getNumOperands() == 3;
15182       SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15183                        HasGlue ? Copy->getOperand(2) : SDValue()};
15184       EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15201   // fold (VMOVhr (load x)) -> (load (f16*)x)
15203     if (LN0->hasOneUse() && LN0->isUnindexed() &&
15204         LN0->getMemoryVT() == MVT::i16) {
15206           DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15207                           LN0->getBasePtr(), LN0->getMemOperand());
15224   SDValue N0 = N->getOperand(0);
15225   EVT VT = N->getValueType(0);
15227   // fold (VMOVrh (fpconst x)) -> const x
15229     APFloat V = C->getValueAPF();
15233   // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15238         DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15239                        LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15245   // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15246   if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15247       isa<ConstantSDNode>(N0->getOperand(1)))
15248     return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15249                        N0->getOperand(1));
15254 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15255 /// are normal, non-volatile loads.  If so, it is profitable to bitcast an
15259   unsigned NumElts = N->getValueType(0).getVectorNumElements();
15261     SDNode *Elt = N->getOperand(i).getNode();
15262     if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15268 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15273   // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15278   if (N->getNumOperands() == 2)
15284   EVT VT = N->getValueType(0);
15291     SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15301 /// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15315   // 2. The size of its operands are 32-bits (64-bits are not legal).
15316   EVT VT = N->getValueType(0);
15320   if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15327   SDNode *Use = *N->use_begin();
15328   if (Use->getOpcode() != ISD::BITCAST ||
15329       Use->getValueType(0).isFloatingPoint())
15341     SDValue Elt = N->getOperand(Idx);
15342     if (Elt->getOpcode() == ISD::BITCAST) {
15344       if (Elt->getOperand(0).getValueType() == MVT::i32)
15349       --NumOfRelevantElts;
15352   // Check if more than half of the elements require a non-free bitcast.
15367   //                      (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15372     SDValue V = N->getOperand(Idx);
15376         V->getOperand(0).getValueType() == MVT::i32)
15395   EVT VT = N->getValueType(0);
15396   SDValue Op = N->getOperand(0);
15400   if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15402     if (Op->getOperand(0).getValueType() == VT)
15403       return Op->getOperand(0);
15404     return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15407   // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15411         DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15429   EVT VT = N->getValueType(0);
15430   SDValue Op = N->getOperand(0);
15434   if (ST->isLittle())
15437   // VECTOR_REG_CAST undef -> undef
15442   if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15444     if (Op->getOperand(0).getValueType() == VT)
15445       return Op->getOperand(0);
15446     return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15454   if (!Subtarget->hasMVEIntegerOps())
15457   EVT VT = N->getValueType(0);
15458   SDValue Op0 = N->getOperand(0);
15459   SDValue Op1 = N->getOperand(1);
15460   ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15463   // vcmp X, 0, cc -> vcmpz X, cc
15465     return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15469     // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15473     // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15474     if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15482 /// PerformInsertEltCombine - Target-specific dag combine xforms for
15488   EVT VT = N->getValueType(0);
15489   SDNode *Elt = N->getOperand(1).getNode();
15491       !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15498   SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15499   SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15504                                Vec, V, N->getOperand(2));
15510 // extract(x, n); extract(x, n+1)  ->  VMOVRRD(extract v2f64 x, n/2)
15511 // bitcast(extract(x, n)); bitcast(extract(x, n+1))  ->  VMOVRRD(extract x, n/2)
15514   EVT VT = N->getValueType(0);
15529   if (Ext->use_size() == 1 &&
15530       (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15531        Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15542   auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15543     return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15544            isa<ConstantSDNode>(V->getOperand(1)) &&
15545            V->getConstantOperandVal(1) == Lane + 1 &&
15546            V->getOperand(0).getResNo() == ResNo;
15548   if (OtherIt == Op0->uses().end())
15555     if (OtherExt->use_size() != 1 ||
15556         OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15557         OtherExt->use_begin()->getValueType(0) != MVT::i32)
15559     OtherExt = SDValue(*OtherExt->use_begin(), 0);
15577   SDValue Op0 = N->getOperand(0);
15578   EVT VT = N->getValueType(0);
15581   // extract (vdup x) -> x
15582   if (Op0->getOpcode() == ARMISD::VDUP) {
15583     SDValue X = Op0->getOperand(0);
15591     while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15592       X = X->getOperand(0);
15597   // extract ARM_BUILD_VECTOR -> x
15598   if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15599       isa<ConstantSDNode>(N->getOperand(1)) &&
15600       N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15601     return Op0.getOperand(N->getConstantOperandVal(1));
15604   // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15606       isa<ConstantSDNode>(N->getOperand(1)) &&
15611     unsigned Offset = N->getConstantOperandVal(1);
15614       return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15617   // extract x, n; extract x, n+1  ->  VMOVRRD x
15621   // extract (MVETrunc(x)) -> extract x
15622   if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15623     unsigned Idx = N->getConstantOperandVal(1);
15625         Idx / Op0->getOperand(0).getValueType().getVectorNumElements();
15627         Idx % Op0->getOperand(0).getValueType().getVectorNumElements();
15636   SDValue Op = N->getOperand(0);
15637   EVT VT = N->getValueType(0);
15639   // sext_inreg(VGETLANEu) -> VGETLANEs
15641       cast<VTSDNode>(N->getOperand(1))->getVT() ==
15651   SDValue Vec = N->getOperand(0);
15652   SDValue SubVec = N->getOperand(1);
15653   uint64_t IdxVal = N->getConstantOperandVal(2);
15673   // Fold insert_subvector -> concat_vectors
15674   // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15675   // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15690 // shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15693   SDValue Trunc = N->getOperand(0);
15695   if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15699   if (isVMOVNTruncMask(N->getMask(), VT, false))
15705   else if (isVMOVNTruncMask(N->getMask(), VT, true))
15714 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15725   // targets, but for NEON it is better to concatenate two double-register
15726   // size vector operands into a single quad-register size vector.  Do that
15728   //   shuffle(concat(v1, undef), concat(v2, undef)) ->
15730   SDValue Op0 = N->getOperand(0);
15731   SDValue Op1 = N->getOperand(1);
15743   EVT VT = N->getValueType(0);
15757     int MaskElt = SVN->getMaskElt(n);
15758     int NewElt = -1;
15762       NewElt = HalfElts + MaskElt - NumElts;
15805     unsigned IntNo = N->getConstantOperandVal(1);
15926     switch (N->getOpcode()) {
15962     VecTy = N->getValueType(0);
15964     VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15968     VecTy = N->getOperand(1).getValueType();
15980     // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15981     // separate instructions that make it harder to use a non-constant update.
15992   Align Alignment = MemN->getAlign();
15994   // If this is a less-than-standard-aligned load/store, change the type to
15998   // There are 3 ways to get to this base-update combine:
15999   // - intrinsics: they are assumed to be properly aligned (to the standard
16001   // - ARMISD::VLDx nodes: they are only generated from the aforementioned
16003   // - generic load/store instructions: the alignment is specified as an
16007   //   generate non-standard-aligned ARMISD::VLDx nodes.
16011       assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
16039   Ops.push_back(N->getOperand(0)); // incoming chain
16040   Ops.push_back(N->getOperand(Target.AddrOpIdx));
16045     Ops.push_back(StN->getValue());
16050         hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16052       Ops.push_back(N->getOperand(i));
16058   // If this is a non-standard-aligned STORE, the penultimate operand is the
16060   if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16061     SDValue &StVal = Ops[Ops.size() - 2];
16067                                          MemN->getMemOperand());
16074   // If this is an non-standard-aligned LOAD, the first result is the loaded
16076   if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16088 // If (opcode ptr inc) is and ADD-like instruction, return the
16099     return CInc->getZExtValue();
16103       return CInc->getZExtValue();
16113   switch (N->getOpcode()) {
16116     if (isa<ConstantSDNode>(N->getOperand(1))) {
16117       *Ptr = N->getOperand(0);
16118       *CInc = N->getOperand(1);
16124     if (isa<ConstantSDNode>(N->getOperand(2))) {
16125       *Ptr = N->getOperand(1);
16126       *CInc = N->getOperand(2);
16151 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16158   const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16159                             N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16160   const bool isStore = N->getOpcode() == ISD::STORE;
16164   SDValue Addr = N->getOperand(AddrOpIdx);
16169   for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16170          UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
16173         User->getNumOperands() != 2)
16176     SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
16178         getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16180     if (ConstInc || User->getOpcode() == ISD::ADD)
16190         getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16191     for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16196           User->getNumOperands() != 2)
16199       SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16201           getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16206       unsigned NewConstInc = UserOffset - Offset;
16220       --NumValidUpd;
16231   // Try to fold with other users. Non-constant updates are considered
16259   SDValue Addr = N->getOperand(2);
16264   // to post-inc the last of the them.
16265   unsigned IntNo = N->getConstantOperandVal(1);
16266   if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16268   if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16272   for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16273                             UE = Addr.getNode()->use_end();
16276     if (User->getOpcode() != ISD::ADD ||
16322       VecTy = N->getValueType(0);
16324       VecTy = N->getOperand(3).getValueType();
16330     SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16332     if (!CInc || CInc->getZExtValue() != NumBytes)
16348     Ops.push_back(N->getOperand(0)); // incoming chain
16349     Ops.push_back(N->getOperand(2)); // ptr
16352     for (unsigned i = 3; i < N->getNumOperands(); ++i)
16353       Ops.push_back(N->getOperand(i));
16356                                            MemN->getMemOperand());
16373 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16374 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16375 /// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
16379   EVT VT = N->getValueType(0);
16380   // vldN-dup instructions only support 64-bit vectors for N > 1.
16384   // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16385   SDNode *VLD = N->getOperand(0).getNode();
16386   if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16390   unsigned IntNo = VLD->getConstantOperandVal(1);
16404   // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16406   unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16407   for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16413     if (User->getOpcode() != ARMISD::VDUPLANE ||
16414         VLDLaneNo != User->getConstantOperandVal(1))
16418   // Create the vldN-dup node.
16425   SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16428                                            Ops, VLDMemInt->getMemoryVT(),
16429                                            VLDMemInt->getMemOperand());
16432   for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16442   // Now the vldN-lane intrinsic is dead except for its chain result.
16453 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
16458   SDValue Op = N->getOperand(0);
16459   EVT VT = N->getValueType(0);
16462   if (Subtarget->hasMVEIntegerOps()) {
16468                               N->getOperand(0), N->getOperand(1));
16472   // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16473   // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16486   // The canonical VMOV for a zero vector uses a 32-bit element size.
16497 /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16500   SDValue Op = N->getOperand(0);
16503   if (Subtarget->hasMVEIntegerOps()) {
16504     // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16507       return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16510       return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16514   if (!Subtarget->hasNEON())
16517   // Match VDUP(LOAD) -> VLD1DUP.
16521   if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16522       LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16523     SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16524                      DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16525     SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16528                                 LD->getMemoryVT(), LD->getMemOperand());
16539   EVT VT = N->getValueType(0);
16542   if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16554   SDValue StVal = St->getValue();
16556   if (!St->isTruncatingStore() || !VT.isVector())
16559   EVT StVT = St->getMemoryVT();
16584   SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16586     ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16608   // Bitcast the original vector into a vector of store-size units
16617   SDValue BasePtr = St->getBasePtr();
16625         DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16626                      St->getAlign(), St->getMemOperand()->getFlags());
16639   if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16641   SDValue Trunc = St->getValue();
16642   if (Trunc->getOpcode() != ISD::FP_ROUND)
16644   EVT FromVT = Trunc->getOperand(0).getValueType();
16667     ArrayRef<int> M = SVN->getMask();
16669     if (SVN->getOperand(1).isUndef())
16692   SDValue Ch = St->getChain();
16693   SDValue BasePtr = St->getBasePtr();
16694   Align Alignment = St->getOriginalAlign();
16695   MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16696   AAMDNodes AAInfo = St->getAAInfo();
16720         Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16732   if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16734   SDValue Trunc = St->getValue();
16735   if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16737   EVT FromVT = Trunc->getOperand(0).getValueType();
16743   SDValue Ch = St->getChain();
16744   SDValue BasePtr = St->getBasePtr();
16745   Align Alignment = St->getOriginalAlign();
16746   MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16747   AAMDNodes AAInfo = St->getAAInfo();
16761         Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16771 // use of more integer post-inc stores not available with vstr.
16773   if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16775   SDValue Extract = St->getValue();
16779   if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16791   SDValue Ch = St->getChain();
16792   SDValue BasePtr = St->getBasePtr();
16793   Align Alignment = St->getOriginalAlign();
16794   MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16795   AAMDNodes AAInfo = St->getAAInfo();
16798                                     St->getPointerInfo(), NewToVT, Alignment,
16804 /// PerformSTORECombine - Target-specific dag combine xforms for
16810   if (St->isVolatile())
16812   SDValue StVal = St->getValue();
16815   if (Subtarget->hasNEON())
16819   if (Subtarget->hasMVEFloatOps())
16823   if (Subtarget->hasMVEIntegerOps()) {
16836   if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16837       StVal.getNode()->hasOneUse()) {
16841     SDValue BasePtr = St->getBasePtr();
16843         St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16844         BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16845         St->getMemOperand()->getFlags());
16850                         StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16851                         OffsetPtr, St->getPointerInfo().getWithOffset(4),
16852                         St->getOriginalAlign(),
16853                         St->getMemOperand()->getFlags());
16857       StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16875     return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16876                         St->getPointerInfo(), St->getAlign(),
16877                         St->getMemOperand()->getFlags(), St->getAAInfo());
16881   if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16888 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16889 /// can replace combinations of VMUL and VCVT (floating-point to integer)
16899   if (!Subtarget->hasNEON())
16902   SDValue Op = N->getOperand(0);
16907   SDValue ConstVec = Op->getOperand(1);
16913   MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16926   int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16927   if (C == -1 || C == 0 || C > 32)
16931   bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16936       DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16940     FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16947   if (!Subtarget->hasMVEFloatOps())
16950   // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16953   SDValue Op0 = N->getOperand(0);
16954   SDValue Op1 = N->getOperand(1);
16955   EVT VT = N->getValueType(0);
16958   // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16978   SDNodeFlags FaddFlags = N->getFlags();
16989   SDValue LHS = N->getOperand(0);
16990   SDValue RHS = N->getOperand(1);
16991   EVT VT = N->getValueType(0);
16994   if (!N->getFlags().hasAllowReassociation())
16997   // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
17006         DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
17008     VCMLA->setFlags(A->getFlags());
17028 /// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17029 /// can replace combinations of VCVT (integer to floating-point) and VMUL
17039   if (!Subtarget->hasNEON())
17042   SDValue Op = N->getOperand(0);
17043   unsigned OpOpcode = Op.getNode()->getOpcode();
17044   if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17048   SDValue ConstVec = N->getOperand(1);
17052   MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17067   if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
17078   if (C == -1 || C == 0 || C > 32)
17097   if (!ST->hasMVEIntegerOps())
17100   assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17101   EVT ResVT = N->getValueType(0);
17102   SDValue N0 = N->getOperand(0);
17145     if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17147     SDValue A = N0->getOperand(0);
17154     if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17155         !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
17157     Mask = N0->getOperand(0);
17158     SDValue Ext = N0->getOperand(1);
17159     if (Ext->getOpcode() != ExtendCode)
17161     SDValue A = Ext->getOperand(0);
17179     if (Mul->getOpcode() == ExtendCode &&
17180         Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17182       Mul = Mul->getOperand(0);
17183     if (Mul->getOpcode() != ISD::MUL)
17185     SDValue ExtA = Mul->getOperand(0);
17186     SDValue ExtB = Mul->getOperand(1);
17187     if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17189     A = ExtA->getOperand(0);
17190     B = ExtB->getOperand(0);
17206     if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17207         !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
17209     Mask = N0->getOperand(0);
17210     SDValue Mul = N0->getOperand(1);
17211     if (Mul->getOpcode() == ExtendCode &&
17212         Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17214       Mul = Mul->getOperand(0);
17215     if (Mul->getOpcode() != ISD::MUL)
17217     SDValue ExtA = Mul->getOperand(0);
17218     SDValue ExtB = Mul->getOperand(1);
17219     if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17221     A = ExtA->getOperand(0);
17222     B = ExtB->getOperand(0);
17231     // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17334   if (Op->getOpcode() == ISD::VSELECT)
17335     Op = Op->getOperand(1);
17336   if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17337       Op->getOperand(0)->getOpcode() == ISD::MUL) {
17338     SDValue Mul = Op->getOperand(0);
17339     if (Mul->getOperand(0) == Mul->getOperand(1) &&
17340         Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17341       SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17343         Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17344                           N0->getOperand(0), Ext, N0->getOperand(2));
17356   unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17357   auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17358   if (!Shuf || !Shuf->getOperand(1).isUndef())
17362   ArrayRef<int> Mask = Shuf->getMask();
17372   if (N->getNumOperands() != VecOp + 1) {
17373     auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17374     if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17379   for (SDValue Op : N->ops()) {
17385   return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17390   SDValue Op0 = N->getOperand(0);
17391   SDValue Op1 = N->getOperand(1);
17392   unsigned IsTop = N->getConstantOperandVal(2);
17394   // VMOVNT a undef -> a
17395   // VMOVNB a undef -> a
17396   // VMOVNB undef a -> a
17397   if (Op1->isUndef())
17399   if (Op0->isUndef() && !IsTop)
17404   if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17405        Op1->getOpcode() == ARMISD::VQMOVNu) &&
17406       Op1->getConstantOperandVal(2) == 0)
17407     return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17408                            Op0, Op1->getOperand(1), N->getOperand(2));
17413   unsigned NumElts = N->getValueType(0).getVectorNumElements();
17430   SDValue Op0 = N->getOperand(0);
17431   unsigned IsTop = N->getConstantOperandVal(2);
17433   unsigned NumElts = N->getValueType(0).getVectorNumElements();
17446   EVT VT = N->getValueType(0);
17447   SDValue LHS = N->getOperand(0);
17448   SDValue RHS = N->getOperand(1);
17452   // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17453   if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17457     SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17460     return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17467   SDValue Op0 = N->getOperand(0);
17468   SDValue Op1 = N->getOperand(1);
17470   // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17472   if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17473     int ShiftAmt = C->getSExtValue();
17480     if (ShiftAmt >= -32 && ShiftAmt < 0) {
17482           N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17483       SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17484                                      DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17493 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17497   unsigned IntNo = N->getConstantOperandVal(0);
17505   // the build_vectors for 64-bit vector element shift counts are generally
17522     EVT VT = N->getOperand(1).getValueType();
17529       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17533       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17542       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17548       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17553       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17565       if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17618     return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17619                        N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17623     EVT VT = N->getOperand(1).getValueType();
17627     if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17629     else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17636     return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17637                        N->getOperand(1), N->getOperand(2),
17660     unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17662     if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17677     unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17679     if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17687     bool Unsigned = N->getConstantOperandVal(2);
17689     return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17696     bool Unsigned = N->getConstantOperandVal(2);
17702     for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17704         Ops.push_back(N->getOperand(i));
17716 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
17718 /// combining instead of DAG legalizing because the build_vectors for 64-bit
17725   EVT VT = N->getValueType(0);
17727   if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17728       N->getOperand(0)->getOpcode() == ISD::AND &&
17729       N->getOperand(0)->hasOneUse()) {
17736     SDValue N0 = N->getOperand(0);
17737     ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17740     uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17741     ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17744     uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17752         SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17756             DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17765   if (ST->hasMVEIntegerOps())
17770   switch (N->getOpcode()) {
17774     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17776       return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17783     if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17785           (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17787       return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17799   SDValue N0 = N->getOperand(0);
17803   if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17804       LD->getExtensionType() != ISD::NON_EXTLOAD)
17806   EVT FromVT = LD->getValueType(0);
17807   EVT ToVT = N->getValueType(0);
17828   SDValue Ch = LD->getChain();
17829   SDValue BasePtr = LD->getBasePtr();
17830   Align Alignment = LD->getOriginalAlign();
17831   MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17832   AAMDNodes AAInfo = LD->getAAInfo();
17835       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17851                     LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17877 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17881   SDValue N0 = N->getOperand(0);
17883   // Check for sign- and zero-extensions of vector extract operations of 8- and
17884   // 16-bit vector elements. NEON and MVE support these directly. They are
17886   // to 32-bit types and it is messy to recognize the operations after that.
17887   if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17891     EVT VT = N->getValueType(0);
17901       switch (N->getOpcode()) {
17915   if (ST->hasMVEIntegerOps())
17924   if (ST->hasMVEFloatOps())
17935   if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17936       !Subtarget->isThumb2())
17972 /// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17976   EVT VT = N->getValueType(0);
17977   SDValue N0 = N->getOperand(0);
17982   if (!ST->hasMVEIntegerOps())
17993     if (Min->getOpcode() != ISD::SMIN)
17995     if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
18000       SaturateC = APInt(32, (1 << 15) - 1, true);
18002       SaturateC = APInt(16, (1 << 7) - 1, true);
18005     if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18008     if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
18030                     N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18038     if (Min->getOpcode() != ISD::UMIN)
18043       SaturateC = APInt(32, (1 << 16) - 1, true);
18045       SaturateC = APInt(16, (1 << 8) - 1, true);
18048     if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18084   const APInt *CV = &C->getAPIntValue();
18085   return CV->isPowerOf2() ? CV : nullptr;
18102   SDValue Op0 = CMOV->getOperand(0);
18103   SDValue Op1 = CMOV->getOperand(1);
18104   auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18105   SDValue CmpZ = CMOV->getOperand(4);
18108   if (!isNullConstant(CmpZ->getOperand(1)))
18111   assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18112   SDValue And = CmpZ->getOperand(0);
18113   if (And->getOpcode() != ISD::AND)
18115   const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18118   SDValue X = And->getOperand(0);
18128   if (Op1->getOpcode() != ISD::OR)
18131   ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18134   SDValue Y = Op1->getOperand(0);
18140   APInt OrCI = OrC->getAPIntValue();
18141   unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18155   unsigned BitInX = AndC->logBase2();
18185   switch (N->getOpcode()) {
18191     if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18200     if (Const->isZero())
18202     else if (Const->isOne())
18206     CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18207     return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18224   // The hwloop intrinsics that we're interested are used for control-flow,
18226   // - test.start.loop.iterations will test whether its operand is zero. If it
18228   // - loop.decrement.reg also tests whether its operand is zero. If it is
18239   SDValue Chain = N->getOperand(0);
18242   if (N->getOpcode() == ISD::BRCOND) {
18244     Cond = N->getOperand(1);
18245     Dest = N->getOperand(2);
18247     assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18248     CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18249     Cond = N->getOperand(2);
18250     Dest = N->getOperand(4);
18251     if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18252       if (!Const->isOne() && !Const->isZero())
18254       Imm = Const->getZExtValue();
18288   unsigned IntOp = Int->getConstantOperandVal(1);
18289   assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
18291   SDNode *Br = *N->use_begin();
18292   SDValue OtherTarget = Br->getOperand(1);
18296     SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18346 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18349   SDValue Cmp = N->getOperand(4);
18354   EVT VT = N->getValueType(0);
18358   SDValue Chain = N->getOperand(0);
18359   SDValue BB = N->getOperand(1);
18360   SDValue ARMcc = N->getOperand(2);
18361   ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
18364   // -> (brcond Chain BB CC CPSR Cmp)
18365   if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18366       LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18367       LHS->getOperand(0)->hasOneUse() &&
18368       isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18369       isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18370       isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18372         ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18373         LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18379 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18382   SDValue Cmp = N->getOperand(4);
18387   EVT VT = N->getValueType(0);
18391   SDValue FalseVal = N->getOperand(0);
18392   SDValue TrueVal = N->getOperand(1);
18393   SDValue ARMcc = N->getOperand(2);
18394   ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
18397   if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18423                       N->getOperand(3), Cmp);
18428                       N->getOperand(3), NewCmp);
18432   // -> (cmov F T CC CPSR Cmp)
18433   if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18434       isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18437                        LHS->getOperand(2), LHS->getOperand(3),
18438                        LHS->getOperand(4));
18445   // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18446   // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18447   // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18448   if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18449       N->getConstantOperandVal(2) == ARMCC::NE) {
18451     if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18452       if (N->getConstantOperandVal(2) == ARMCC::NE)
18454       return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18455                          N->getOperand(1),
18457                          N->getOperand(3), C);
18464       if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18465         // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18467         // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18472         // CMOV 0, 1, ==, (CMPZ x, y) ->
18476         // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18480         //     x - y + (0 - (x - y)) + C == C
18492                (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18494       // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18500                         N->getOperand(3), CPSRGlue.getValue(1));
18505         (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18508       // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18515                         N->getOperand(3), CPSRGlue.getValue(1));
18522   // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18529   // CMOV x, z, !=, (CMPZ x, 0) ->
18534   if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18540     unsigned ShiftAmount = TrueConst->logBase2();
18573   SDValue Src = N->getOperand(0);
18574   EVT DstVT = N->getValueType(0);
18576   // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18577   if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18588   // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18598   // bitcast(extract(x, n)); bitcast(extract(x, n+1))  ->  VMOVRRD x
18610   EVT VT = N->getValueType(0);
18613   // MVETrunc(Undef, Undef) -> Undef
18614   if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18617   // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18618   if (N->getNumOperands() == 2 &&
18619       N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18620       N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18621     return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18622                        N->getOperand(0).getOperand(1),
18623                        N->getOperand(1).getOperand(0),
18624                        N->getOperand(1).getOperand(1));
18626   // MVETrunc(shuffle, shuffle) -> VMOVN
18627   if (N->getNumOperands() == 2 &&
18628       N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18629       N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18630     auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18631     auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18633     if (S0->getOperand(0) == S1->getOperand(0) &&
18634         S0->getOperand(1) == S1->getOperand(1)) {
18636       SmallVector<int, 8> Mask(S0->getMask());
18637       Mask.append(S1->getMask().begin(), S1->getMask().end());
18642             DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18643             DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18648             DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18649             DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18656   if (all_of(N->ops(), [](SDValue Op) {
18663     for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18664       SDValue O = N->getOperand(Op);
18682   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18683   int NumIns = N->getNumOperands();
18687   if (N->getNumOperands() == 4)
18697     SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18711   SDValue N0 = N->getOperand(0);
18713   if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18716   EVT FromVT = LD->getMemoryVT();
18717   EVT ToVT = N->getValueType(0);
18732       N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18733   if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18734       LD->getExtensionType() != ISD::EXTLOAD &&
18735       LD->getExtensionType() != NewExtType)
18741   SDValue Ch = LD->getChain();
18742   SDValue BasePtr = LD->getBasePtr();
18743   Align Alignment = LD->getOriginalAlign();
18744   MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18745   AAMDNodes AAInfo = LD->getAAInfo();
18762                     LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18779   EVT VT = N->getValueType(0);
18781   assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18784   EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18788     return N->getOpcode() == ARMISD::MVESEXT
18794   // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18795   if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18796     SDValue Ext = Extend(N->getOperand(0));
18800   // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18801   if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18802     ArrayRef<int> Mask = SVN->getMask();
18804     assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18806     SDValue Op0 = SVN->getOperand(0);
18807     SDValue Op1 = SVN->getOperand(1);
18820       V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18824       V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18829       V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18833       V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18839   // MVEEXT(load) -> extload, extload
18840   if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18850   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18851   int NumOuts = N->getNumValues();
18854   EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18856   if (N->getNumOperands() == 4)
18861   SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18872         N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18882   switch (N->getOpcode()) {
18999     unsigned BitWidth = N->getValueType(0).getSizeInBits();
19001     if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19006     unsigned BitWidth = N->getValueType(0).getSizeInBits();
19008     if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19017     unsigned BitWidth = N->getValueType(0).getSizeInBits();
19019     if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19020         (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19025     unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19027     unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19029     if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19030         (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19035     unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19037     unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19039     if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19040         (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19045     unsigned BitWidth = N->getValueType(0).getSizeInBits();
19047     if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19048         (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19056     unsigned BitWidth = N->getValueType(0).getSizeInBits();
19058     if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19059         (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19065     switch (N->getConstantOperandVal(1)) {
19116   bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19123         *Fast = Subtarget->hasV7Ops();
19129     // For any little-endian targets with neon, we can support unaligned ld/st
19131     // A big-endian target may also explicitly support unaligned accesses
19132     if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19139   if (!Subtarget->hasMVEIntegerOps())
19159   // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19183   if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19201   // Let the target-independent logic figure it out.
19205 // 64-bit integers are split into their high and low parts and held in two
19209   if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19211   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19212   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19239     // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19258     return Subtarget->hasFullFP16();
19268     return Ext->getType()->getScalarSizeInBits() ==
19269            2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19286   if (!I->getType()->isVectorTy())
19289   if (Subtarget->hasNEON()) {
19290     switch (I->getOpcode()) {
19293       if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19295       Ops.push_back(&I->getOperandUse(0));
19296       Ops.push_back(&I->getOperandUse(1));
19304   if (!Subtarget->hasMVEIntegerOps())
19308     if (!I->hasOneUse())
19310     auto *Sub = cast<Instruction>(*I->users().begin());
19311     return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19314     if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19315         match(I->getOperand(1), m_FNeg(m_Value())))
19321     switch (I->getOpcode()) {
19338         switch (II->getIntrinsicID()) {
19373   for (auto OpIdx : enumerate(I->operands())) {
19376     if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19380     if (Shuffle->getOpcode() == Instruction::BitCast)
19381       Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19393     for (Use &U : Op->uses()) {
19399     Ops.push_back(&Shuffle->getOperandUse(0));
19401       Ops.push_back(&Op->getOperandUse(0));
19408   if (!Subtarget->hasMVEIntegerOps())
19410   Type *SVIType = SVI->getType();
19411   Type *ScalarType = SVIType->getScalarType();
19413   if (ScalarType->isFloatTy())
19414     return Type::getInt32Ty(SVIType->getContext());
19415   if (ScalarType->isHalfTy())
19416     return Type::getInt16Ty(SVIType->getContext());
19427     if (Ld->isExpandingLoad())
19431   if (Subtarget->hasMVEIntegerOps())
19438   if (ExtVal->use_empty() ||
19439       !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19442   SDNode *U = *ExtVal->use_begin();
19443   if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19444        U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19451   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19457   assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19464 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19469 /// ARM supports both fused and unfused multiply-add operations; we already
19475 /// patterns (and we don't have the non-fused floating point instruction).
19484     return Subtarget->hasMVEFloatOps();
19486     return Subtarget->useFPVFMx16();
19488     return Subtarget->useFPVFMx();
19490     return Subtarget->useFPVFMx64();
19519   if ((V & (Scale - 1)) != 0)
19528   if (VT.isVector() && Subtarget->hasNEON())
19530   if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19531       !Subtarget->hasMVEFloatOps())
19537     V = -V;
19543   if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19559   if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19562   if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19566     // + imm12 or - imm8
19575 /// isLegalAddressImmediate - Return true if the integer value can be used
19586   if (Subtarget->isThumb1Only())
19588   else if (Subtarget->isThumb2())
19593     V = - V;
19599     // +- imm12
19602     // +- imm8
19606     if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19665 /// isLegalAddressingMode - Return true if the addressing mode represented
19689     if (Subtarget->isThumb1Only())
19692     if (Subtarget->isThumb2())
19701       if (Scale < 0) Scale = -Scale;
19708       // r +/- r
19709       if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19729 /// isLegalICmpImmediate - Return true if the specified immediate is legal
19735   if (!Subtarget->isThumb())
19736     return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19737            ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19738   if (Subtarget->isThumb2())
19739     return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19740            ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19741   // Thumb1 doesn't have cmn, and only 8-bit immediates.
19745 /// isLegalAddImmediate - Return true if the specified immediate is a legal add
19752   if (!Subtarget->isThumb())
19753     return ARM_AM::getSOImmVal(AbsImm) != -1;
19754   if (Subtarget->isThumb2())
19755     return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19756   // Thumb1 only has 8-bit unsigned immediate.
19761 // (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19774   const int64_t C0 = C0Node->getSExtValue();
19775   APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19789   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19794     Base = Ptr->getOperand(0);
19795     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19796       int RHSC = (int)RHS->getZExtValue();
19797       if (RHSC < 0 && RHSC > -256) {
19798         assert(Ptr->getOpcode() == ISD::ADD);
19800         Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19804     isInc = (Ptr->getOpcode() == ISD::ADD);
19805     Offset = Ptr->getOperand(1);
19809     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19810       int RHSC = (int)RHS->getZExtValue();
19811       if (RHSC < 0 && RHSC > -0x1000) {
19812         assert(Ptr->getOpcode() == ISD::ADD);
19814         Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19815         Base = Ptr->getOperand(0);
19820     if (Ptr->getOpcode() == ISD::ADD) {
19823         ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19825         Base = Ptr->getOperand(1);
19826         Offset = Ptr->getOperand(0);
19828         Base = Ptr->getOperand(0);
19829         Offset = Ptr->getOperand(1);
19834     isInc = (Ptr->getOpcode() == ISD::ADD);
19835     Base = Ptr->getOperand(0);
19836     Offset = Ptr->getOperand(1);
19848   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19851   Base = Ptr->getOperand(0);
19852   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19853     int RHSC = (int)RHS->getZExtValue();
19854     if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19855       assert(Ptr->getOpcode() == ISD::ADD);
19857       Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19860       isInc = Ptr->getOpcode() == ISD::ADD;
19861       Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19873   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19875   if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19878   // We allow LE non-masked loads to change the type (for example use a vldrb.8
19883   ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19884   int RHSC = (int)RHS->getZExtValue();
19887     if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19888       assert(Ptr->getOpcode() == ISD::ADD);
19890       Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19893       isInc = Ptr->getOpcode() == ISD::ADD;
19894       Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19900   // Try to find a matching instruction based on s/zext, Alignment, Offset and
19902   Base = Ptr->getOperand(0);
19922 /// getPreIndexedAddressParts - returns true by value, base pointer and
19924 /// can be legally represented as pre-indexed load / store address.
19930   if (Subtarget->isThumb1Only())
19939     Ptr = LD->getBasePtr();
19940     VT = LD->getMemoryVT();
19941     Alignment = LD->getAlign();
19942     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19944     Ptr = ST->getBasePtr();
19945     VT = ST->getMemoryVT();
19946     Alignment = ST->getAlign();
19948     Ptr = LD->getBasePtr();
19949     VT = LD->getMemoryVT();
19950     Alignment = LD->getAlign();
19951     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19954     Ptr = ST->getBasePtr();
19955     VT = ST->getMemoryVT();
19956     Alignment = ST->getAlign();
19964     isLegal = Subtarget->hasMVEIntegerOps() &&
19967                   Subtarget->isLittle(), Base, Offset, isInc, DAG);
19969     if (Subtarget->isThumb2())
19983 /// getPostIndexedAddressParts - returns true by value, base pointer and
19985 /// combined with a load / store to form a post-indexed load / store.
19997     VT = LD->getMemoryVT();
19998     Ptr = LD->getBasePtr();
19999     Alignment = LD->getAlign();
20000     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20001     isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20003     VT = ST->getMemoryVT();
20004     Ptr = ST->getBasePtr();
20005     Alignment = ST->getAlign();
20006     isNonExt = !ST->isTruncatingStore();
20008     VT = LD->getMemoryVT();
20009     Ptr = LD->getBasePtr();
20010     Alignment = LD->getAlign();
20011     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20012     isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20015     VT = ST->getMemoryVT();
20016     Ptr = ST->getBasePtr();
20017     Alignment = ST->getAlign();
20018     isNonExt = !ST->isTruncatingStore();
20023   if (Subtarget->isThumb1Only()) {
20024     // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
20025     // must be non-extending/truncating, i32, with an offset of 4.
20026     assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
20027     if (Op->getOpcode() != ISD::ADD || !isNonExt)
20029     auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
20030     if (!RHS || RHS->getZExtValue() != 4)
20035     Offset = Op->getOperand(1);
20036     Base = Op->getOperand(0);
20044     isLegal = Subtarget->hasMVEIntegerOps() &&
20046                                         Subtarget->isLittle(), Base, Offset,
20049     if (Subtarget->isThumb2())
20060     // Swap base ptr and offset to catch more post-index load / store when
20062     if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
20063         !Subtarget->isThumb2())
20066     // Post-indexed load / store update the base pointer.
20093       if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20095         Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
20112         static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20117       EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20119       Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20143     assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20145     unsigned Idx = Pos->getZExtValue();
20164     KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20172     KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20173     KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20178     // CSNEG: KnownOp0 or KnownOp1 * -1
20187           KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
20220   unsigned Mask = C->getZExtValue();
20226   // If the mask is all zeros, let the target-independent code replace the
20231   // If the mask is all ones, erase the AND. (Currently, the target-independent
20237   auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20240   auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20262   // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20264   if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20271   // two-instruction sequence.
20289     if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20290         isa<ConstantSDNode>(Op->getOperand(2))) {
20291       unsigned ShAmt = Op->getConstantOperandVal(2);
20293                                                         << (32 - ShAmt)))
20297                     TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20315 //===----------------------------------------------------------------------===//
20317 //===----------------------------------------------------------------------===//
20321   if (!Subtarget->hasV6Ops())
20324   InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20325   StringRef AsmStr = IA->getAsmString();
20339         IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20340       IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20341       if (Ty && Ty->getBitWidth() == 32)
20358   if (!Subtarget->hasVFP2Base())
20362   if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20370 /// getConstraintType - Given a constraint letter, return the type of
20411   Type *type = CallOperandVal->getType();
20418     if (type->isIntegerTy()) {
20419       if (Subtarget->isThumb())
20426     if (type->isFloatingPointTy())
20442       if (Subtarget->isThumb())
20446       if (Subtarget->isThumb())
20450       if (Subtarget->isThumb1Only())
20509 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20531     int64_t CVal64 = C->getSExtValue();
20542         if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20547         if (Subtarget->isThumb1Only()) {
20552         } else if (Subtarget->isThumb2()) {
20554           // data-processing instruction.
20555           if (ARM_AM::getT2SOImmVal(CVal) != -1)
20559           // data-processing instruction.
20560           if (ARM_AM::getSOImmVal(CVal) != -1)
20566         if (Subtarget->isThumb1Only()) {
20567           // This must be a constant between -255 and -1, for negated ADD
20571           if (CVal >= -255 && CVal <= -1)
20574           // This must be a constant between -4095 and 4095. It is not clear
20577           if (CVal >= -4095 && CVal <= 4095)
20583         if (Subtarget->isThumb1Only()) {
20584           // A 32-bit value where only one byte has a nonzero value. Exclude
20590         } else if (Subtarget->isThumb2()) {
20592           // value in a data-processing instruction. This can be used in GCC
20596           if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20600           // value in a data-processing instruction. This can be used in GCC
20604           if (ARM_AM::getSOImmVal(~CVal) != -1)
20610         if (Subtarget->isThumb1Only()) {
20611           // This must be a constant between -7 and 7,
20612           // for 3-operand ADD/SUB immediate instructions.
20613           if (CVal >= -7 && CVal < 7)
20615         } else if (Subtarget->isThumb2()) {
20617           // data-processing instruction. This can be used in GCC with an "n"
20621           if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20625           // data-processing instruction. This can be used in GCC with an "n"
20629           if (ARM_AM::getSOImmVal(-CVal) != -1)
20635         if (Subtarget->isThumb1Only()) {
20644           if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20650         if (Subtarget->isThumb1Only()) {
20658         if (Subtarget->isThumb1Only()) {
20659           // This must be a multiple of 4 between -508 and 508, for
20661           if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20679   assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20680           N->getOpcode() == ISD::SREM    || N->getOpcode() == ISD::UREM) &&
20682   bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20683                   N->getOpcode() == ISD::SREM;
20697   assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20698           N->getOpcode() == ISD::SREM    || N->getOpcode() == ISD::UREM) &&
20700   bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20701                   N->getOpcode() == ISD::SREM;
20704   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20705     EVT ArgVT = N->getOperand(i).getValueType();
20707     Entry.Node = N->getOperand(i);
20713   if (Subtarget->isTargetWindows() && Args.size() >= 2)
20719   assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20720           Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20721           Subtarget->isTargetWindows()) &&
20722          "Register-based DivRem lowering only");
20723   unsigned Opcode = Op->getOpcode();
20727   EVT VT = Op->getValueType(0);
20737         return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20746   //     rem = a - b * div
20749   bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20750                                         : Subtarget->hasDivideInARMMode();
20751   if (hasDivide && Op->getValueType(0).isSimple() &&
20752       Op->getSimpleValueType(0) == MVT::i32) {
20754     const SDValue Dividend = Op->getOperand(0);
20755     const SDValue Divisor = Op->getOperand(1);
20777   if (Subtarget->isTargetWindows())
20792   EVT VT = N->getValueType(0);
20794   if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20797         return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20818   RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20823   bool isSigned = N->getOpcode() == ISD::SREM;
20827   if (Subtarget->isTargetWindows())
20839   assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20840   return ResNode->getOperand(1);
20845   assert(Subtarget->isTargetWindows() && "unsupported target platform");
20853           "no-stack-arg-probe")) {
20855         cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20862                       DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20886   bool IsStrict = Op->isStrictFPOpcode();
20891          "Unexpected type for custom-lowering FP_EXTEND");
20893   assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20896   assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20899   // Converting from 32 -> 64 is valid if we have FP64.
20900   if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20911   // Either we are converting from 16 -> 64, without FP16 and/or
20912   // FP.double-precision or without Armv8-fp. So we must do it in two
20914   // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20921     bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20935              "Unexpected type for custom-lowering FP_EXTEND");
20945   bool IsStrict = Op->isStrictFPOpcode();
20954          "Unexpected type for custom-lowering FP_ROUND");
20956   assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20961   // Instruction from 32 -> 16 if hasFP16 is valid
20962   if (SrcSz == 32 && Subtarget->hasFP16())
20965   // Lib call from 32 -> 16 / 64 -> [32, 16]
20968          "Unexpected type for custom-lowering FP_ROUND");
20992 /// isFPImmLegal - Returns true if the target can instruction select the
20997   if (!Subtarget->hasVFP3Base())
20999   if (VT == MVT::f16 && Subtarget->hasFullFP16())
21000     return ARM_AM::getFP16Imm(Imm) != -1;
21001   if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
21002       ARM_AM::getFP32FP16Imm(Imm) != -1)
21005     return ARM_AM::getFP32Imm(Imm) != -1;
21006   if (VT == MVT::f64 && Subtarget->hasFP64())
21007     return ARM_AM::getFP64Imm(Imm) != -1;
21011 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
21033     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21036     Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21037     Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21049     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21050     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
21069       Type *ArgTy = I.getArgOperand(ArgI)->getType();
21070       if (!ArgTy->isVectorTy())
21074     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21077     Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21078     Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21091       Type *ArgTy = I.getArgOperand(ArgI)->getType();
21092       if (!ArgTy->isVectorTy())
21096     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21108     Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21110     Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21113     Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21122     Type *VecTy = I.getArgOperand(1)->getType();
21124     Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21127     Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21145     Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21155     unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21166     Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21175     Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21184     MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21185     unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21247   assert(Ty->isIntegerTy());
21249   unsigned Bits = Ty->getPrimitiveSizeInBits();
21265   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21268   if (!Subtarget->hasDataBarrier()) {
21270     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21272     if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21285     // Only a full system barrier exists in the M-class architectures.
21286     Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21292 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21299     llvm_unreachable("Invalid fence: unordered/non-atomic");
21304     if (!Inst->hasAtomicStore())
21309     if (Subtarget->preferISHSTBarriers())
21324     llvm_unreachable("Invalid fence: unordered/not-atomic");
21336 // Loads and stores less than 64-bits are already atomic; ones above that
21343   if (Subtarget->isMClass())
21345   else if (Subtarget->isThumb())
21346     has64BitAtomicStore = Subtarget->hasV7Ops();
21348     has64BitAtomicStore = Subtarget->hasV6Ops();
21350   unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21355 // Loads and stores less than 64-bits are already atomic; ones above that
21361 // sections A8.8.72-74 LDRD)
21365   if (Subtarget->isMClass())
21367   else if (Subtarget->isThumb())
21368     has64BitAtomicLoad = Subtarget->hasV7Ops();
21370     has64BitAtomicLoad = Subtarget->hasV6Ops();
21372   unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21378 // and up to 64 bits on the non-M profiles
21381   if (AI->isFloatingPointOperation())
21384   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21386   if (Subtarget->isMClass())
21387     hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21388   else if (Subtarget->isThumb())
21389     hasAtomicRMW = Subtarget->hasV7Ops();
21391     hasAtomicRMW = Subtarget->hasV6Ops();
21392   if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21393     // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21397     // can never succeed. So at -O0 lower this operation to a CAS loop.
21406 // bits, and up to 64 bits on the non-M profiles.
21409   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21413   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21414   unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21416   if (Subtarget->isMClass())
21417     HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21418   else if (Subtarget->isThumb())
21419     HasAtomicCmpXchg = Subtarget->hasV7Ops();
21421     HasAtomicCmpXchg = Subtarget->hasV6Ops();
21423       HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21435   return !Subtarget->isROPI() && !Subtarget->isRWPI();
21439   if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21451     F->addParamAttr(0, Attribute::AttrKind::InReg);
21456   if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21463   if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21471   if (!Subtarget->hasNEON())
21478   if (VectorTy->isFPOrFPVectorTy())
21486   assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21487   unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21498   return Subtarget->hasV6T2Ops();
21502   return Subtarget->hasV6T2Ops();
21507   if (!Subtarget->hasV7Ops())
21513   if (!Mask || Mask->getValue().getBitWidth() > 32u)
21515   auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21516   return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21517                                 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21523   if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21532   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21535   // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21538   if (ValueTy->getPrimitiveSizeInBits() == 64) {
21547     if (!Subtarget->isLittle())
21555   Type *Tys[] = { Addr->getType() };
21560   CI->addParamAttr(
21561       0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21567   if (!Subtarget->hasV7Ops())
21569   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21576   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21582   if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21586     Type *Int32Ty = Type::getInt32Ty(M->getContext());
21590     if (!Subtarget->isLittle())
21596   Type *Tys[] = { Addr->getType() };
21601                   Val, Strex->getFunctionType()->getParamType(0)),
21603   CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21604                                      Val->getType()));
21610   return Subtarget->isMClass();
21626   unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21628   if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21634   if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21636   if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21640   if (VecTy->getNumElements() < 2)
21647   if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21652   if (Subtarget->hasNEON() && VecSize == 64)
21658   if (Subtarget->hasNEON())
21660   if (Subtarget->hasMVEIntegerOps())
21685   auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21686   Type *EltTy = VecTy->getElementType();
21688   const DataLayout &DL = LI->getDataLayout();
21689   Align Alignment = LI->getAlign();
21701   if (EltTy->isPointerTy())
21707   Value *BaseAddr = LI->getPointerOperand();
21710     // If we're going to generate more than one load, reset the sub-vector type
21712     VecTy = FixedVectorType::get(VecTy->getElementType(),
21713                                  VecTy->getNumElements() / NumLoads);
21719     if (Subtarget->hasNEON()) {
21720       Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21726           Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21730       Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21738       Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21741           Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21749   // Holds sub-vectors extracted from the load intrinsic return values. The
21750   // sub-vectors are associated with the shufflevector instructions they will
21758       BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21759                                             VecTy->getNumElements() * Factor);
21772       if (EltTy->isPointerTy())
21775             FixedVectorType::get(SV->getType()->getElementType(), VecTy));
21781   // Replace uses of the shufflevector instructions with the sub-vectors
21783   // associated with more than one sub-vector, those sub-vectors will be
21789     SVI->replaceAllUsesWith(WideVec);
21827   auto *VecTy = cast<FixedVectorType>(SVI->getType());
21828   assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21830   unsigned LaneLen = VecTy->getNumElements() / Factor;
21831   Type *EltTy = VecTy->getElementType();
21834   const DataLayout &DL = SI->getDataLayout();
21835   Align Alignment = SI->getAlign();
21845   Value *Op0 = SVI->getOperand(0);
21846   Value *Op1 = SVI->getOperand(1);
21851   if (EltTy->isPointerTy()) {
21856         FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21864   Value *BaseAddr = SI->getPointerOperand();
21868     // and sub-vector type to something legal.
21870     SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21875   auto Mask = SVI->getShuffleMask();
21879     if (Subtarget->hasNEON()) {
21883       Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21887           SI->getModule(), StoreInts[Factor - 2], Tys);
21892       Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21899       Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21902           Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21919       BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21935             StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21966     for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21968       if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21974     if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21976     Members += SubMembers * AT->getNumElements();
21977   } else if (Ty->isFloatTy()) {
21982   } else if (Ty->isDoubleTy()) {
21994       return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21996       return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21998       switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
22018   if (!ArgTy->isVectorTy())
22021   // Avoid over-aligning vector parameters. It would require realigning the
22026 /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
22027 /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
22039   LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
22041   bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
22049   return Subtarget->useSjLjEH() ? Register() : ARM::R0;
22056   return Subtarget->useSjLjEH() ? Register() : ARM::R1;
22061   ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
22062   AFI->setIsSplitCSR(true);
22068   const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
22069   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
22073   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
22074   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
22075   MachineBasicBlock::iterator MBBI = Entry->begin();
22085     Register NewVR = MRI->createVirtualRegister(RC);
22087     // FIXME: this currently does not emit CFI pseudo-instructions, it works
22088     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
22090     // CFI pseudo-instructions.
22091     assert(Entry->getParent()->getFunction().hasFnAttribute(
22094     Entry->addLiveIn(*I);
22095     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
22098     // Insert the copy-back instructions right before the terminator.
22100       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
22101               TII->get(TargetOpcode::COPY), *I)
22112   return Subtarget->hasMVEIntegerOps();
22121   auto *ScalarTy = VTy->getScalarType();
22122   unsigned NumElements = VTy->getNumElements();
22124   unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22129   if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22130     return Subtarget->hasMVEFloatOps();
22135   return Subtarget->hasMVEIntegerOps() &&
22136          (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22137           ScalarTy->isIntegerTy(32));
22145   FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22147   unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22152     int Stride = Ty->getNumElements() / 2;
22153     auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22175     ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());