Lines Matching +full:fiq +full:- +full:based

1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
122 #define DEBUG_TYPE "arm-isel"
131 ARMInterworking("arm-interworking", cl::Hidden,
136 "arm-promote-constant", cl::Hidden,
141 "arm-promote-constant-max-size", cl::Hidden,
145 "arm-promote-constant-max-total", cl::Hidden,
150 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
403 // MVE integer-only / float support. Only doing FP data processing on the FP
404 // vector types is inhibited at integer-only level.
499 RegInfo = Subtarget->getRegisterInfo();
500 Itins = Subtarget->getInstrItineraryData();
505 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
506 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
514 if (Subtarget->isTargetMachO()) {
516 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
517 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
523 // Single-precision floating-point arithmetic.
529 // Double-precision floating-point arithmetic.
535 // Single-precision comparisons.
544 // Double-precision comparisons.
553 // Floating-point to integer conversions.
565 // Integer to floating-point conversions.
585 if (Subtarget->isAAPCS_ABI() &&
586 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
587 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
594 // Double-precision floating-point arithmetic helper functions
601 // Double-precision floating-point comparison helper functions
611 // Single-precision floating-point arithmetic helper functions
618 // Single-precision floating-point comparison helper functions
628 // Floating-point to integer conversions.
645 // Integer to floating-point conversions.
707 if (Subtarget->isTargetWindows()) {
729 // Use divmod compiler-rt calls for iOS 5.0 and later.
730 if (Subtarget->isTargetMachO() &&
731 !(Subtarget->isTargetIOS() &&
732 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
737 // The half <-> float conversion functions are always soft-float on
738 // non-watchos platforms, but are needed for some targets which use a
739 // hard-float calling convention by default.
740 if (!Subtarget->isTargetWatchABI()) {
741 if (Subtarget->isAAPCS_ABI()) {
754 if (Subtarget->isTargetAEABI()) {
771 if (Subtarget->isThumb1Only())
776 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
777 Subtarget->hasFPRegs()) {
786 if (!Subtarget->hasVFP2Base())
788 if (!Subtarget->hasFP64())
792 if (Subtarget->hasFullFP16()) {
801 if (Subtarget->hasBF16()) {
804 if (!Subtarget->hasFullFP16())
826 if (Subtarget->hasMVEIntegerOps())
827 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
829 // Combine low-overhead loop intrinsics so that we can lower i1 types.
830 if (Subtarget->hasLOB()) {
834 if (Subtarget->hasNEON()) {
848 if (Subtarget->hasFullFP16()) {
853 if (Subtarget->hasBF16()) {
859 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
899 if (Subtarget->hasNEON()) {
939 // Custom handling for some quad-vector types to detect VMULL.
965 // types wider than 8-bits. However, custom lowering can leverage the
1004 if (!Subtarget->hasVFP4Base()) {
1031 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1039 if (Subtarget->hasMVEIntegerOps()) {
1044 if (Subtarget->hasMVEFloatOps()) {
1048 if (!Subtarget->hasFP64()) {
1049 // When targeting a floating-point unit with only single-precision
1050 // operations, f64 is legal for the few double-precision instructions which
1051 // are present However, no double-precision operations other than moves,
1092 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1095 if (Subtarget->hasFullFP16()) {
1101 if (!Subtarget->hasFP16()) {
1106 computeRegisterProperties(Subtarget->getRegisterInfo());
1108 // ARM does not have floating-point extending loads.
1124 if (!Subtarget->isThumb1Only()) {
1137 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1149 if (Subtarget->hasDSP()) {
1159 if (Subtarget->hasBaseDSP()) {
1167 if (Subtarget->isThumb1Only()) {
1171 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1172 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1187 if (Subtarget->hasMVEIntegerOps())
1191 if (Subtarget->isThumb1Only()) {
1197 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1208 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1216 // implementation-specific ways of obtaining this information.
1217 if (Subtarget->hasPerfMon())
1221 if (!Subtarget->hasV6Ops())
1224 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1225 : Subtarget->hasDivideInARMMode();
1232 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1243 // Register based DivRem for AEABI (RTABI 4.2)
1244 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1245 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1246 Subtarget->isTargetWindows()) {
1251 if (Subtarget->isTargetWindows()) {
1320 if (Subtarget->isTargetWindows())
1328 if (Subtarget->hasAnyDataBarrier() &&
1329 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1333 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1338 if (!Subtarget->hasAcquireRelease() ||
1347 if (Subtarget->hasDataBarrier())
1351 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1375 if (Subtarget->isTargetLinux() ||
1376 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1380 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1381 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1388 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1389 Subtarget->hasForced32BitAtomics()) {
1390 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1403 if (!Subtarget->hasV6Ops()) {
1409 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1410 !Subtarget->isThumb1Only()) {
1411 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1429 if (Subtarget->useSjLjEH())
1441 if (Subtarget->hasFullFP16()) {
1451 if (Subtarget->hasFullFP16())
1466 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1467 !Subtarget->isThumb1Only()) {
1474 if (!Subtarget->hasVFP4Base()) {
1480 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1481 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1482 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1487 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1488 if (!Subtarget->hasFP16()) {
1493 // Strict floating-point comparisons need custom lowering.
1509 // FP-ARMv8 implements a lot of rounding-like FP operations.
1510 if (Subtarget->hasFPARMv8Base()) {
1519 if (Subtarget->hasNEON()) {
1526 if (Subtarget->hasFP64()) {
1539 if (Subtarget->hasFullFP16()) {
1558 if (Subtarget->hasNEON()) {
1570 if (Subtarget->hasFullFP16()) {
1583 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1585 if (Subtarget->isTargetWindows()) {
1597 // We have target-specific dag combine patterns for the following nodes:
1598 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1602 if (Subtarget->hasMVEIntegerOps())
1605 if (Subtarget->hasV6Ops())
1607 if (Subtarget->isThumb1Only())
1610 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1611 Subtarget->isThumb2()) {
1617 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1618 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1623 //// temporary - rewrite interface to use type
1626 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1628 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1635 // Prefer likely predicted branches to selects on out-of-order cores.
1636 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1638 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1639 setPrefFunctionAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1641 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1645 return Subtarget->useSoftFloat();
1649 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1650 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1674 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1675 // coalescing by double-counting the SP regs. See the FIXME above.
1676 if (Subtarget->useNEONForSinglePrecisionFP())
1916 if ((Subtarget->hasMVEIntegerOps() &&
1919 (Subtarget->hasMVEFloatOps() &&
1925 /// getRegClassFor - Return the register class that should be used for the
1934 if (Subtarget->hasNEON()) {
1940 if (Subtarget->hasMVEIntegerOps()) {
1957 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1958 // cycle faster than 4-byte aligned LDM.
1960 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1972 unsigned NumVals = N->getNumValues();
1977 EVT VT = N->getValueType(i);
1984 if (!N->isMachineOpcode())
1989 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1990 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1994 if (!Itins->isEmpty() &&
1995 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
2001 //===----------------------------------------------------------------------===//
2003 //===----------------------------------------------------------------------===//
2009 return Const->getZExtValue() == 16;
2017 return Const->getZExtValue() == 16;
2025 return Const->getZExtValue() == 16;
2029 // Check for a signed 16-bit value. We special case SRA because it makes it
2039 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2056 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2085 //===----------------------------------------------------------------------===//
2087 //===----------------------------------------------------------------------===//
2089 /// getEffectiveCallingConv - Get the effective calling convention, taking into
2113 if (!Subtarget->isAAPCS_ABI())
2115 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2123 if (!Subtarget->isAAPCS_ABI()) {
2124 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2127 } else if (Subtarget->hasVFP2Base() &&
2128 !Subtarget->isThumb1Only() && !isVarArg)
2145 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2176 if (Subtarget->hasFullFP16()) {
2189 if (Subtarget->hasFullFP16()) {
2201 /// LowerCallResult - Lower the result values of a call into the
2240 if (!Subtarget->isLittle())
2257 if (!Subtarget->isLittle())
2279 // had been copied to the LSBs of a 32-bit register.
2285 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2286 // is less than 32 bits must be sign- or zero-extended after the call for
2337 unsigned id = Subtarget->isLittle() ? 0 : 1;
2341 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2353 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2362 /// LowerCall - Lowering a call into a callseq_start <-
2363 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2396 // Lower 'returns_twice' calls to a pseudo-instruction.
2397 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2398 !Subtarget->noBTIAtReturnTwice())
2399 GuardWithBTI = AFI->branchTargetEnforcement();
2401 // Determine whether this is a non-secure function call.
2402 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2406 if (!Subtarget->supportsTailCall())
2409 // For both the non-secure calls and the returns from a CMSE entry function,
2412 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2418 // as BLXr has a 16-bit encoding.
2419 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2421 auto *BB = CLI.CB->getParent();
2422 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2423 count_if(GV->users(), [&BB](const User *U) {
2425 cast<Instruction>(U)->getParent() == BB;
2444 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2455 // arguments to begin at SP+0. Completely unused for non-tail calls.
2460 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2463 // popped size 16-byte aligned.
2470 SPDiff = NumReusableBytes - NumBytes;
2474 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2475 AFI->setArgRegsSaveSize(-SPDiff);
2534 // had been copied to the LSBs of a 32-bit register.
2621 offset = RegEnd - RegBegin;
2634 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2659 // Build a sequence of copy-to-reg nodes chained together with token chain
2676 GVal = G->getGlobal();
2677 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2679 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2683 if (Subtarget->genLongCalls()) {
2684 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2685 "long-calls codegen is not position independent!");
2690 if (Subtarget->genExecuteOnly()) {
2691 if (Subtarget->useMovt())
2697 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2709 const char *Sym = S->getSymbol();
2711 if (Subtarget->genExecuteOnly()) {
2712 if (Subtarget->useMovt())
2718 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2733 bool isDef = GVal->isStrongDefinitionForLinker();
2736 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2738 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2739 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2748 } else if (Subtarget->isTargetCOFF()) {
2749 assert(Subtarget->isTargetWindows() &&
2752 if (GVal->hasDLLImportStorageClass())
2770 const char *Sym = S->getSymbol();
2771 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2772 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2793 "call to non-secure function would "
2796 DAG.getContext()->diagnose(Diag);
2801 "call to non-secure function would return value through pointer",
2803 DAG.getContext()->diagnose(Diag);
2809 if (Subtarget->isThumb()) {
2814 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2819 if (!isDirect && !Subtarget->hasV5TOps())
2821 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2823 !Subtarget->hasMinSize())
2830 // We don't usually want to end the call-sequence here because we would tidy
2831 // the frame up *after* the call, however in the ABI-changing tail-call case
2853 // Add a register mask operand representing the call-preserved registers.
2855 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2857 // For 'this' returns, use the R0-preserving mask if applicable
2858 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2864 Mask = ARI->getCallPreservedMask(MF, CallConv);
2867 Mask = ARI->getCallPreservedMask(MF, CallConv);
2890 // If we're guaranteeing tail-calls will be honoured, the callee must
2892 // we need to undo that after it returns to restore the status-quo.
2895 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2908 /// HandleByVal - Every parameter *after* a byval parameter is passed
2917 unsigned Reg = State->AllocateReg(GPRArgRegs);
2922 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2924 Reg = State->AllocateReg(GPRArgRegs);
2929 unsigned Excess = 4 * (ARM::R4 - Reg);
2935 const unsigned NSAAOffset = State->getStackSize();
2937 while (State->AllocateReg(GPRArgRegs))
2945 // the end (first after last) register would be reg + param-size-in-regs,
2950 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2954 State->AllocateReg(GPRArgRegs);
2959 Size = std::max<int>(Size - Excess, 0);
2962 /// MatchingStackOffset - Return true if the given stack call argument is
2972 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2975 MachineInstr *Def = MRI->getVRegDef(VR);
2979 if (!TII->isLoadFromStackSlot(*Def, FI))
2992 SDValue Ptr = Ld->getBasePtr();
2996 FI = FINode->getIndex();
3006 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3025 assert(Subtarget->supportsTailCall());
3028 // to the call take up r0-r3. The reason is that there are no legal registers
3035 if (Subtarget->isThumb1Only())
3038 if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
3045 // Exception-handling functions need a special set of instructions to indicate
3046 // a return to the hardware. Tail-calling another function would probably
3061 // Externally-defined functions with weak linkage should not be
3062 // tail-called on ARM when the OS does not support dynamic
3063 // pre-emption of symbols, as the AAELF spec requires normal calls
3066 // situation (as used for tail calls) is implementation-defined, so we
3069 const GlobalValue *GV = G->getGlobal();
3071 if (GV->hasExternalWeakLinkage() &&
3085 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3086 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3088 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3089 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3097 if (AFI_Caller->getArgRegsSaveSize())
3108 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3120 // register/stack-slot combinations. The types will not match
3169 // IRQ/FIQ: +4 "subs pc, lr, #4"
3177 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3184 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3198 // CCValAssign - represent the assignment of the return value to a location.
3201 // CCState - Info about the registers and stack slots.
3211 bool isLittleEndian = Subtarget->isLittle();
3215 AFI->setReturnRegsCount(RVLocs.size());
3218 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3225 DAG.getContext()->diagnose(Diag);
3238 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3239 // Half-precision return values can be returned like this:
3272 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3312 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3332 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3334 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3351 // CPUs which aren't M-class use a special sequence to return from
3355 // M-class CPUs actually use a normal return sequence with a special
3356 // (hardware-provided) value in LR, so the normal code path works.
3358 !Subtarget->isMClass()) {
3359 if (Subtarget->isThumb1Only())
3364 ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE :
3370 if (N->getNumValues() != 1)
3372 if (!N->hasNUsesOfValue(1, 0))
3376 SDNode *Copy = *N->use_begin();
3377 if (Copy->getOpcode() == ISD::CopyToReg) {
3380 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3382 TCChain = Copy->getOperand(0);
3383 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3387 for (SDNode *U : VMov->uses()) {
3388 if (U->getOpcode() != ISD::CopyToReg)
3395 for (SDNode *U : VMov->uses()) {
3396 SDValue UseChain = U->getOperand(0);
3404 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3410 } else if (Copy->getOpcode() == ISD::BITCAST) {
3412 if (!Copy->hasOneUse())
3414 Copy = *Copy->use_begin();
3415 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3419 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3421 TCChain = Copy->getOperand(0);
3427 for (const SDNode *U : Copy->uses()) {
3428 if (U->getOpcode() != ARMISD::RET_GLUE &&
3429 U->getOpcode() != ARMISD::INTRET_GLUE)
3442 if (!Subtarget->supportsTailCall())
3445 if (!CI->isTailCall())
3455 SDValue WriteValue = Op->getOperand(2);
3459 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3463 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3481 // When generating execute-only code Constant Pools must be promoted to the
3483 // blocks, but this way we guarantee that execute-only behaves correct with
3484 // position-independent addressing modes.
3485 if (Subtarget->genExecuteOnly()) {
3487 auto T = const_cast<Type*>(CP->getType());
3488 auto C = const_cast<Constant*>(CP->getConstVal());
3495 Twine(AFI->createPICLabelUId())
3502 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3503 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3504 Align CPAlign = CP->getAlign();
3505 if (Subtarget->isThumb1Only())
3507 if (CP->isMachineConstantPoolEntry())
3509 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3511 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3516 // If we don't have a 32-bit pc-relative branch instruction then the jump
3518 // execute-only it must be placed out-of-line.
3519 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3531 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3533 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3537 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3538 ARMPCLabelIndex = AFI->createPICLabelUId();
3581 assert(Subtarget->isTargetDarwin() &&
3607 getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3609 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3625 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3668 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3684 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3687 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3689 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3724 const GlobalValue *GV = GA->getGlobal();
3735 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3737 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3739 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3778 if (Subtarget->isTargetDarwin())
3781 if (Subtarget->isTargetWindows())
3785 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3786 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
3802 SmallVector<const User*,4> Worklist(V->users());
3806 append_range(Worklist, U->users());
3811 if (!I || I->getParent()->getParent() != F)
3830 // use-site. We know that if we inline a variable at one use site, we'll
3831 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3834 // the GV from fast-isel generated code.
3840 if (!GVar || !GVar->hasInitializer() ||
3841 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3842 !GVar->hasLocalLinkage())
3846 // from .data to .text. This is not allowed in position-independent code.
3847 auto *Init = GVar->getInitializer();
3848 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3849 Init->needsDynamicRelocation())
3859 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3861 unsigned RequiredPadding = 4 - (Size % 4);
3863 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3876 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3877 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3893 StringRef S = CDAInit->getAsString();
3897 while (RequiredPadding--)
3904 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3905 AFI->markGlobalAsPromotedToConstantPool(GVar);
3906 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3907 PaddedSize - 4);
3915 if (!(GV = GA->getAliaseeObject()))
3918 return V->isConstant();
3924 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3939 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3943 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3949 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3951 if (!GV->isDSOLocal())
3956 } else if (Subtarget->isROPI() && IsRO) {
3957 // PC-relative.
3961 } else if (Subtarget->isRWPI() && !IsRO) {
3962 // SB-relative.
3964 if (Subtarget->useMovt()) {
3986 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3987 if (Subtarget->useMovt())
4004 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4008 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4010 if (Subtarget->useMovt())
4021 if (Subtarget->isGVIndirectSymbol(GV))
4029 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4030 assert(Subtarget->useMovt() &&
4032 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4036 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4038 if (GV->hasDLLImportStorageClass())
4095 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4097 ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4099 // Mark LR an implicit live-in.
4107 if (Subtarget->isThumb())
4176 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4180 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4258 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4261 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4273 if (!Subtarget->hasDataBarrier()) {
4275 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4277 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4286 if (Subtarget->isMClass()) {
4287 // Only a full system barrier exists in the M-class architectures.
4289 } else if (Subtarget->preferISHSTBarriers() &&
4305 if (!(Subtarget->isThumb2() ||
4306 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4313 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4318 if (Subtarget->isThumb()) {
4337 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4338 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4352 if (AFI->isThumb1OnlyFunction())
4375 if (!Subtarget->isLittle())
4380 // The remaining GPRs hold either the beginning of variable-argument
4393 // Currently, two use-cases possible:
4394 // Case #1. Non-var-args function, and we meet first byval parameter.
4399 // "store-reg" instructions.
4400 // Case #2. Var-args function, that doesn't contain byval parameters.
4417 ArgOffset = -4 * (ARM::R4 - RBegin);
4425 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4458 AFI->setVarArgsFrameIndex(FrameIndex);
4514 AFI->setArgRegsSaveSize(0);
4540 int lastInsIndex = -1;
4547 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4548 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4555 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4600 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4612 AFI->setPreservesR0();
4616 // If this is an 8 or 16-bit value, it is really passed promoted
4628 // had been copied to the LSBs of a 32-bit register.
4635 // less than 32 bits must be sign- or zero-extended in the callee for
4639 if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4691 if (AFI->isCmseNSEntryFunction()) {
4695 DAG.getContext()->diagnose(Diag);
4707 AFI->setArgumentStackToRestore(StackArgSize);
4709 AFI->setArgumentStackSize(StackArgSize);
4711 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4715 DAG.getContext()->diagnose(Diag);
4721 /// isFloatingPointZero - Return true if this is +0.0.
4724 return CFP->getValueAPF().isPosZero();
4730 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4731 return CFP->getValueAPF().isPosZero();
4733 } else if (Op->getOpcode() == ISD::BITCAST &&
4734 Op->getValueType(0) == MVT::f64) {
4737 SDValue BitcastOp = Op->getOperand(0);
4738 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4739 isNullConstant(BitcastOp->getOperand(0)))
4751 unsigned C = RHSC->getZExtValue();
4758 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4760 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4765 if (C != 0 && isLegalICmpImmediate(C-1)) {
4767 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4788 // In ARM and Thumb-2, the compare instructions can shift their second
4815 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4816 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4821 uint64_t RHSV = RHSC->getZExtValue();
4836 // some tweaks to the heuristics for the previous and->shift transform.
4838 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4839 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4889 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4900 /// duplicateCmp - Glue values can have only one use, so this function
5069 // value. So compute 1 - C.
5082 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5092 switch (Op->getOpcode()) {
5108 switch (Op->getOpcode()) {
5128 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5129 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5143 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5158 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5159 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5168 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5169 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5193 // undefined bits before doing a full-word comparison with zero.
5255 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5286 // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5306 // x < -k ? -k : (x > k ? k : x)
5307 // x < -k ? -k : (x < k ? x : k)
5308 // x > -k ? (x > k ? k : x) : -k
5309 // x < k ? (x < -k ? -k : x) : k
5324 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5334 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5346 // Check that the constant in the lower-bound check is
5347 // the opposite of the constant in the upper-bound check
5352 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5353 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5379 // - The conditions and values match up
5380 // - k is 0 or -1 (all ones)
5389 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5420 return !Subtarget->hasVFP2Base();
5422 return !Subtarget->hasFP64();
5424 return !Subtarget->hasFullFP16();
5433 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5438 // into more efficient bit operations, which is possible when k is 0 or -1
5439 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5442 // Only allow this transformation on full-width (32-bit) operations
5459 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5465 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5467 unsigned TVal = CTVal->getZExtValue();
5468 unsigned FVal = CFVal->getZExtValue();
5496 // -(-a) == a, but (a+1)+1 != a).
5536 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5551 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5563 if (Subtarget->hasFPARMv8Base() &&
5594 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
5599 if (!N->hasOneUse())
5602 if (!N->getNumValues())
5605 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5607 // vmrs are very slow, e.g. cortex-a8.
5622 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5623 Ld->getPointerInfo(), Ld->getAlign(),
5624 Ld->getMemOperand()->getFlags());
5640 SDValue Ptr = Ld->getBasePtr();
5642 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5643 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5648 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5649 Ld->getPointerInfo().getWithOffset(4),
5650 commonAlignment(Ld->getAlign(), 4),
5651 Ld->getMemOperand()->getFlags());
5658 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5663 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5721 !Subtarget->isThumb1Only();
5726 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5736 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5750 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5772 !Subtarget->isThumb1Only();
5778 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5789 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5839 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5843 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5844 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5851 if (isPositionIndependent() || Subtarget->isROPI()) {
5902 bool IsStrict = Op->isStrictFPOpcode();
5939 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5945 Subtarget->hasFP64())
5948 Subtarget->hasFullFP16())
5951 Subtarget->hasMVEFloatOps())
5954 Subtarget->hasMVEFloatOps())
5962 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5966 DAG.getConstant((1 << BW) - 1, DL, VT));
5969 DAG.getConstant(-(1 << BW), DL, VT));
6046 bool UseNEON = !InGPR && Subtarget->hasNEON();
6138 // Return LR, which contains the return address. Mark it an implicit live-in.
6155 while (Depth--)
6181 assert(N->getValueType(0) == MVT::i64
6182 && "ExpandREAD_REGISTER called for non-i64 type result.");
6186 N->getOperand(0),
6187 N->getOperand(1));
6203 SDValue Op = BC->getOperand(0);
6204 EVT DstVT = BC->getValueType(0);
6225 const APInt &APIntIndex = Index->getAPIntValue();
6232 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6244 /// ExpandBITCAST - If the target supports VFP, this function is called to
6246 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6253 SDValue Op = N->getOperand(0);
6258 EVT DstVT = N->getValueType(0);
6274 // Turn i64->f64 into VMOVDRR.
6286 // Turn f64->i64 into VMOVRRD.
6304 /// getZeroVector - Returns a vector of specified type with all zero elements.
6319 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6323 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6352 DAG.getConstant(VTBits - 1, dl, VT))
6363 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6367 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6405 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6428 SDValue Chain = Op->getOperand(0);
6429 SDValue RMValue = Op->getOperand(1);
6433 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6434 // ((arg - 1) & 3) << 22).
6470 SDValue Chain = Op->getOperand(0);
6471 SDValue Mode = Op->getOperand(1);
6498 SDValue Chain = Op->getOperand(0);
6521 EVT VT = N->getValueType(0);
6522 if (VT.isVector() && ST->hasNEON()) {
6524 // Compute the least significant set bit: LSB = X & -X
6525 SDValue X = N->getOperand(0);
6532 // Compute with: cttz(x) = ctpop(lsb - 1)
6540 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6541 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6545 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6550 // Compute with: cttz(x) = ctpop(lsb - 1)
6552 // Compute LSB - 1.
6567 if (!ST->hasV6T2Ops())
6570 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6576 EVT VT = N->getValueType(0);
6579 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6586 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6607 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
6619 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6627 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
6636 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6639 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
6653 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6654 Cnt = -Cnt;
6662 EVT VT = N->getValueType(0);
6675 if (N->getOpcode() == ISD::SHL) {
6676 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6677 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6679 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6680 N->getOperand(1));
6683 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6686 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6688 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6689 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6695 EVT ShiftVT = N->getOperand(1).getValueType();
6697 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6699 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6700 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6705 EVT VT = N->getValueType(0);
6712 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6713 N->getOpcode() == ISD::SHL) &&
6716 unsigned ShOpc = N->getOpcode();
6717 if (ST->hasMVEIntegerOps()) {
6718 SDValue ShAmt = N->getOperand(1);
6724 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6725 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6729 if (ShAmt->getValueType(0) != MVT::i32)
6747 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6757 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6761 if (ST->isThumb1Only())
6764 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6766 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6770 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6790 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6794 if (ST->hasNEON())
6797 assert(ST->hasMVEIntegerOps() &&
6805 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6813 // Special-case integer 64-bit equality comparisons. They aren't legal,
6831 // 64-bit comparisons are not legal in general.
6839 if (ST->hasMVEFloatOps()) {
6888 if (ST->hasMVEIntegerOps()) {
6905 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6931 // comparison to a specialized compare-against-zero form.
6982 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6990 /// isVMOVModifiedImm - Check if the specified splat value corresponds to a
7002 // immediate instructions others than VMOV do not support the 8-bit encoding
7004 // 32-bit version.
7012 // Any 1-byte value is OK. Op=0, Cmode=1110.
7020 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
7037 // NEON's 32-bit VMOV supports splat values where:
7090 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7093 // and fall through here to test for a valid 64-bit splat. But, then the
7100 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7117 unsigned Mask = (1 << BytesPerElem) - 1;
7122 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7146 const APFloat &FPVal = CFP->getValueAPF();
7148 // Prevent floating-point constants from using literal loads
7149 // when execute-only is enabled.
7150 if (ST->genExecuteOnly()) {
7151 // We shouldn't trigger this for v6m execute-only
7152 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7176 if (!ST->hasVFP3Base())
7180 // an SP-only FPU
7181 if (IsDouble && !Subtarget->hasFP64())
7187 if (ImmVal != -1) {
7188 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7206 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7316 Imm -= NumElts;
7341 // WhichResult gives the offset for each element in the mask based on which
7368 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7385 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7440 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7447 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7476 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7514 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7521 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7547 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7554 /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7585 // Look for <15, ..., 3, -1, 1, 0>.
7587 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7599 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7670 if (!ST->hasMVEFloatOps())
7723 if (!ST->hasMVEFloatOps())
7767 Val = N->getAsZExtVal();
7769 if (ST->isThumb1Only()) {
7773 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7784 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7808 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7822 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7843 if (!ST->hasMVEIntegerOps())
7877 switch (N->getOpcode()) {
7886 return N->getOperand(1).getNode() == Op;
7888 switch (N->getConstantOperandVal(0)) {
7905 return N->getOperand(2).getNode() == Op;
7922 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7931 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7937 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7939 all_of(BVN->uses(),
7949 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7950 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7966 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7975 if (ImmVal != -1) {
7983 if (ST->hasMVEIntegerOps() &&
8033 Value = ValueCounts.begin()->first;
8045 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8055 // constant-index forms.
8057 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8058 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8063 if (VT != Value->getOperand(0).getValueType()) {
8064 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8072 Value->getOperand(0), Value->getOperand(1));
8132 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8133 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8134 // into two 64-bit vectors; we might discover a better way to lower it.
8135 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8149 // Vectors with 32- or 64-bit elements can be built by directly assigning
8153 // Do the expansion with floating-point types, since that is what the VFP
8166 // worse. For a vector with one or two non-undef values, that's
8239 Source->MinElt = std::min(Source->MinElt, EltNo);
8240 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8292 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8302 Src.WindowBase = -NumSrcElts;
8320 Src.WindowBase = -Src.MinElt;
8343 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
8351 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8365 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8366 ExtractBase += NumElts * (Src - Sources.begin());
8419 /// isShuffleMaskLegal - Targets can use this to indicate that they only
8440 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8455 else if (Subtarget->hasNEON() &&
8463 else if (Subtarget->hasMVEIntegerOps() &&
8467 else if (Subtarget->hasMVEIntegerOps() &&
8476 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8482 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8483 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8502 // vrev <4 x i16> -> VREV32
8505 // vrev <4 x i8> -> VREV16
8513 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8519 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8523 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8527 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8531 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8547 if (V2.getNode()->isUndef())
8592 // of all ones or all zeroes and selecting the lanes based upon the real
8619 // Recast our new predicate-as-integer v16i8 vector into something
8628 ArrayRef<int> ShuffleMask = SVN->getMask();
8630 assert(ST->hasMVEIntegerOps() &&
8646 // to essentially promote the boolean predicate to a 8-bit integer, where
8650 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8651 // fields in a register into 8 other arbitrary 2-bit fields!
8697 // Detect which mov lane this would be from the first non-undef element.
8698 int MovIdx = -1;
8702 return -1;
8708 if (MovIdx == -1)
8709 return -1;
8715 return -1;
8723 if (Elt != -1) {
8724 SDValue Input = Op->getOperand(0);
8726 Input = Op->getOperand(1);
8727 Elt -= 4;
8746 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8748 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8770 // An One-Off Identity mask is one that is mostly an identity mask from as
8771 // single source but contains a single element out-of-place, either from a
8777 OffElement = -1;
8780 if (Mask[i] == -1)
8784 if (OffElement == -1)
8790 return NonUndef > 2 && OffElement != -1;
8822 if (ST->hasMVEIntegerOps() && EltSize == 1)
8825 // Convert shuffles that are directly supported on NEON to target-specific
8829 // FIXME: floating-point vectors should be canonicalized to integer vectors
8831 ArrayRef<int> ShuffleMask = SVN->getMask();
8834 if (SVN->isSplat()) {
8835 int Lane = SVN->getSplatIndex();
8837 if (Lane == -1) Lane = 0;
8863 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8877 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8889 if (ST->hasNEON()) {
8898 if (ST->hasMVEIntegerOps()) {
8913 // ->
8918 // native shuffles produce larger results: the two-result ops.
8922 // ->
8925 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8926 SDValue SubV1 = V1->getOperand(0);
8927 SDValue SubV2 = V1->getOperand(1);
8930 // We expect these to have been canonicalized to -1.
8940 "In-place shuffle of concat can only have one result!");
8949 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8973 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8991 if (ST->hasNEON())
8994 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8995 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9004 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
9006 // Do the expansion with floating-point types, since that is what the VFP
9019 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
9030 if (ST->hasNEON() && VT == MVT::v8i8)
9034 if (ST->hasMVEIntegerOps())
9046 assert(ST->hasMVEIntegerOps() &&
9050 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9054 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9072 if (Subtarget->hasMVEIntegerOps() &&
9080 // Reinterpret any such vector-element insertion as one with the
9109 assert(ST->hasMVEIntegerOps() &&
9113 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9132 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9150 assert(ST->hasMVEIntegerOps() &&
9213 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
9227 EVT VT = Op->getValueType(0);
9228 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9232 // two 64-bit vectors are concatenated to a 128-bit vector.
9258 unsigned Index = V2->getAsZExtVal();
9262 assert(ST->hasMVEIntegerOps() &&
9306 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9307 EVT VT = N->getValueType(0);
9310 SDValue Op = N->getOperand(0);
9322 if (!Subtarget->hasMVEIntegerOps())
9325 EVT ToVT = N->getValueType(0);
9340 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9345 // - Wherever possible combine them into an instruction that makes them
9349 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9354 // - Otherwise we have an option. By default we would expand the
9358 // - The other option is to use the fact that loads/store can extend/truncate
9360 // becomes 3 back-to-back memory operations, but at least that is less than
9370 EVT FromVT = N->getOperand(0).getValueType();
9382 if (!Subtarget->hasMVEIntegerOps())
9387 EVT ToVT = N->getValueType(0);
9390 SDValue Op = N->getOperand(0);
9401 N->getOpcode() == ISD::SIGN_EXTEND ? ARMISD::MVESEXT : ARMISD::MVEZEXT;
9406 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9407 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9413 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9414 /// element has been zero/sign-extended, depending on the isSigned parameter,
9419 EVT VT = N->getValueType(0);
9420 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9421 SDNode *BVN = N->getOperand(0).getNode();
9422 if (BVN->getValueType(0) != MVT::v4i32 ||
9423 BVN->getOpcode() != ISD::BUILD_VECTOR)
9426 unsigned HiElt = 1 - LoElt;
9427 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9428 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9429 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9430 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9434 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9435 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9438 if (Hi0->isZero() && Hi1->isZero())
9444 if (N->getOpcode() != ISD::BUILD_VECTOR)
9447 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9448 SDNode *Elt = N->getOperand(i).getNode();
9453 if (!isIntN(HalfSize, C->getSExtValue()))
9456 if (!isUIntN(HalfSize, C->getZExtValue()))
9467 /// isSignExtended - Check if a node is a vector value that is sign-extended
9468 /// or a constant BUILD_VECTOR with sign-extended elements.
9470 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9477 /// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9478 /// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9480 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9505 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9506 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9513 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9514 // 64-bits we need to insert a new extension so that it will be 64-bits.
9525 /// SkipLoadExtensionForVMULL - return a load of the original vector size that
9531 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9534 if (ExtendedTy == LD->getMemoryVT())
9535 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9536 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9537 LD->getMemOperand()->getFlags());
9542 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9543 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9544 LD->getMemoryVT(), LD->getAlign(),
9545 LD->getMemOperand()->getFlags());
9548 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9555 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9556 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9557 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9558 N->getOperand(0)->getValueType(0),
9559 N->getValueType(0),
9560 N->getOpcode());
9570 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9578 if (N->getOpcode() == ISD::BITCAST) {
9579 SDNode *BVN = N->getOperand(0).getNode();
9580 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
9581 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9585 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9588 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9589 EVT VT = N->getValueType(0);
9596 const APInt &CInt = N->getConstantOperandAPInt(i);
9605 unsigned Opcode = N->getOpcode();
9607 SDNode *N0 = N->getOperand(0).getNode();
9608 SDNode *N1 = N->getOperand(1).getNode();
9609 return N0->hasOneUse() && N1->hasOneUse() &&
9616 unsigned Opcode = N->getOpcode();
9618 SDNode *N0 = N->getOperand(0).getNode();
9619 SDNode *N1 = N->getOperand(1).getNode();
9620 return N0->hasOneUse() && N1->hasOneUse() &&
9627 // Multiplications are only custom-lowered for 128-bit vectors so that
9631 "unexpected type for custom-lowering ISD::MUL");
9684 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9691 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9692 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9694 return DAG.getNode(N0->getOpcode(), DL, VT,
9703 // TODO: Should this propagate fast-math-flags?
9734 // TODO: Should this propagate fast-math-flags?
9775 "unexpected type for custom-lowering ISD::SDIV");
9809 // TODO: Should this propagate fast-math-flags?
9812 "unexpected type for custom-lowering ISD::UDIV");
9886 EVT VT = N->getValueType(0);
9919 // by ISD::USUBO_CARRY, so compute 1 - C.
9925 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9929 assert(Subtarget->isTargetDarwin());
9947 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9958 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
10049 if (N->getOpcode() != ISD::SDIV)
10059 if (N->getOperand(0).getValueType().isVector())
10073 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10097 SDValue Op = N->getOperand(1);
10098 if (N->getValueType(0) == MVT::i32)
10130 EVT MemVT = LD->getMemoryVT();
10135 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10136 "Expected a non-extending load");
10137 assert(LD->isUnindexed() && "Expected a unindexed load");
10151 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10153 LD->getMemOperand());
10158 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10169 EVT MemVT = LD->getMemoryVT();
10170 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10172 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10173 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10174 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10178 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10188 EVT MemVT = ST->getMemoryVT();
10192 assert(MemVT == ST->getValue().getValueType());
10193 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10194 assert(ST->isUnindexed() && "Expected a unindexed store");
10199 SDValue Build = ST->getValue();
10204 ? MemVT.getVectorNumElements() - I - 1
10219 ST->getChain(), dl, GRP, ST->getBasePtr(),
10221 ST->getMemOperand());
10227 EVT MemVT = ST->getMemoryVT();
10228 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10230 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10231 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10232 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10237 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10241 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10246 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10247 MemVT, ST->getMemOperand());
10248 } else if (Subtarget->hasMVEIntegerOps() &&
10259 (N->getOpcode() == ARMISD::VMOVIMM &&
10260 isNullConstant(N->getOperand(0))));
10266 SDValue Mask = N->getMask();
10267 SDValue PassThru = N->getPassThru();
10278 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10279 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10280 N->getExtensionType(), N->isExpandingLoad());
10284 isZeroVector(PassThru->getOperand(0));
10292 if (!ST->hasMVEIntegerOps())
10297 switch (Op->getOpcode()) {
10309 SDValue Op0 = Op->getOperand(0);
10339 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10340 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10341 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10347 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10351 if (EltVT != Op->getValueType(0))
10352 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10358 if (!ST->hasMVEFloatOps())
10365 if (!ST->hasNEON())
10369 SDValue Op0 = Op->getOperand(0);
10374 switch (Op->getOpcode()) {
10399 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10420 switch (Op->getOpcode()) {
10438 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10452 // Under Power Management extensions, the cycle-count is:
10454 SDValue Ops[] = { N->getOperand(0), // Chain
10488 assert(N->getValueType(0) == MVT::i64 &&
10490 SDValue Ops[] = {N->getOperand(1),
10491 createGPRPairNode(DAG, N->getOperand(2)),
10492 createGPRPairNode(DAG, N->getOperand(3)),
10493 N->getOperand(0)};
10498 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10519 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10633 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10637 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10681 if (Subtarget->isTargetWindows())
10698 unsigned IntNo = N->getConstantOperandVal(0);
10713 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10717 N->getOperand(1), N->getOperand(2),
10723 /// ReplaceNodeResults - Replace the results of node with an illegal result
10729 switch (N->getOpcode()) {
10765 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10766 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10792 //===----------------------------------------------------------------------===//
10794 //===----------------------------------------------------------------------===//
10796 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10802 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10804 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10806 MachineFunction *MF = MBB->getParent();
10807 MachineRegisterInfo *MRI = &MF->getRegInfo();
10808 MachineConstantPool *MCP = MF->getConstantPool();
10809 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
10810 const Function &F = MF->getFunction();
10812 bool isThumb = Subtarget->isThumb();
10813 bool isThumb2 = Subtarget->isThumb2();
10815 unsigned PCLabelId = AFI->createPICLabelUId();
10819 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10826 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
10830 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
10840 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10841 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10846 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10847 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10852 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10853 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10856 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10870 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10871 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10875 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10876 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10880 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10881 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10885 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10886 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10891 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10892 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10895 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10906 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10907 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10912 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10913 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10917 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10928 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10930 MachineFunction *MF = MBB->getParent();
10931 MachineRegisterInfo *MRI = &MF->getRegInfo();
10932 MachineFrameInfo &MFI = MF->getFrameInfo();
10935 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10953 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10955 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10972 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10981 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10982 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10987 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10988 DispatchBB->setIsEHPad();
10990 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10992 if (Subtarget->isThumb())
10995 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10997 BuildMI(TrapBB, dl, TII->get(trap_opcode));
10998 DispatchBB->addSuccessor(TrapBB);
11000 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
11001 DispatchBB->addSuccessor(DispContBB);
11004 MF->insert(MF->end(), DispatchBB);
11005 MF->insert(MF->end(), DispContBB);
11006 MF->insert(MF->end(), TrapBB);
11012 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
11017 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
11020 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
11030 if (Subtarget->isThumb2()) {
11031 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11032 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11039 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11044 Register VReg1 = MRI->createVirtualRegister(TRC);
11045 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11051 VReg2 = MRI->createVirtualRegister(TRC);
11052 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11058 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11064 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11069 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11070 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11074 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11075 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11082 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11086 } else if (Subtarget->isThumb()) {
11087 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11088 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11095 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11100 MachineConstantPool *ConstantPool = MF->getConstantPool();
11101 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11105 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11106 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11108 Register VReg1 = MRI->createVirtualRegister(TRC);
11109 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11113 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11119 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11124 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11125 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11131 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11132 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11136 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11137 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11144 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11147 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11148 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11156 NewVReg6 = MRI->createVirtualRegister(TRC);
11157 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11164 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11168 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11169 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11176 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11180 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11181 Register VReg1 = MRI->createVirtualRegister(TRC);
11182 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11188 VReg2 = MRI->createVirtualRegister(TRC);
11189 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11195 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11200 MachineConstantPool *ConstantPool = MF->getConstantPool();
11201 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11205 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11206 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11208 Register VReg1 = MRI->createVirtualRegister(TRC);
11209 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11214 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11220 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11225 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11226 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11231 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11232 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11237 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11239 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11240 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11248 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11253 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11263 DispContBB->addSuccessor(CurMBB);
11273 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11276 if (SMBB->isEHPad()) {
11277 BB->removeSuccessor(SMBB);
11282 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11283 BB->normalizeSuccProbs();
11285 // Find the invoke call and mark all of the callee-saved registers as
11290 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11291 if (!II->isCall()) continue;
11295 OI = II->operands_begin(), OE = II->operands_end();
11297 if (!OI->isReg()) continue;
11298 DefRegs[OI->getReg()] = true;
11305 if (Subtarget->isThumb2() &&
11309 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11311 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11321 // Mark all former landing pads as non-landing pads. The dispatch is the only
11324 MBBLPad->setIsEHPad(false);
11332 for (MachineBasicBlock *S : MBB->successors())
11376 /// Emit a post-increment load operation with given size. The instructions
11385 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11392 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11396 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11402 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11408 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11417 /// Emit a post-increment store operation with given size. The instructions
11426 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11433 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11438 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11444 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11450 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11463 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11465 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11466 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11467 MachineFunction::iterator It = ++BB->getIterator();
11475 MachineFunction *MF = BB->getParent();
11476 MachineRegisterInfo &MRI = MF->getRegInfo();
11481 bool IsThumb1 = Subtarget->isThumb1Only();
11482 bool IsThumb2 = Subtarget->isThumb2();
11483 bool IsThumb = Subtarget->isThumb();
11491 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11492 Subtarget->hasNEON()) {
11512 unsigned LoopSize = SizeVal - BytesLeft;
11514 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11553 // movw varEnd, # --> with thumb2
11555 // ldrcp varEnd, idx --> without thumb2
11556 // fallthrough --> loopMBB
11565 // fallthrough --> exitMBB
11567 // epilogue to handle left-over bytes
11570 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11571 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11572 MF->insert(It, loopMBB);
11573 MF->insert(It, exitMBB);
11576 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11577 loopMBB->setCallFrameSize(CallFrameSize);
11578 exitMBB->setCallFrameSize(CallFrameSize);
11581 exitMBB->splice(exitMBB->begin(), BB,
11582 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11583 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11587 if (Subtarget->useMovt()) {
11588 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11591 } else if (Subtarget->genExecuteOnly()) {
11592 assert(IsThumb && "Non-thumb expected to have used movt");
11593 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11595 MachineConstantPool *ConstantPool = MF->getConstantPool();
11596 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11600 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11601 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11603 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
11607 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11613 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11620 BB->addSuccessor(loopMBB);
11635 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11638 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11641 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11648 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11650 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11655 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11662 BuildMI(*BB, BB->end(), dl,
11663 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11668 MIB->getOperand(5).setReg(ARM::CPSR);
11669 MIB->getOperand(5).setIsDef(true);
11671 BuildMI(*BB, BB->end(), dl,
11672 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11676 BB->addSuccessor(loopMBB);
11677 BB->addSuccessor(exitMBB);
11681 auto StartOfExit = exitMBB->begin();
11707 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11710 assert(Subtarget->isTargetWindows() &&
11712 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11720 // thumb-2 environment, so there is no interworking required. As a result, we
11728 // branches for Thumb), we can generate the long-call version via
11729 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11749 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
11754 BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
11782 MachineFunction *MF = MBB->getParent();
11783 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11785 MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
11786 MF->insert(++MBB->getIterator(), ContBB);
11787 ContBB->splice(ContBB->begin(), MBB,
11788 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11789 ContBB->transferSuccessorsAndUpdatePHIs(MBB);
11790 MBB->addSuccessor(ContBB);
11792 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11793 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11794 MF->push_back(TrapBB);
11795 MBB->addSuccessor(TrapBB);
11797 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11801 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11820 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11825 break; // Should have kill-flag - update below.
11830 if (miI == BB->end()) {
11831 for (MachineBasicBlock *Succ : BB->successors())
11832 if (Succ->isLiveIn(ARM::CPSR))
11838 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11851 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11858 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11865 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11868 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11872 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11896 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11906 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11916 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11925 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11933 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11939 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11949 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11960 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11971 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11975 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11979 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11987 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11989 bool isThumb2 = Subtarget->isThumb2();
11996 // Thumb1 post-indexed loads are really just single-register LDMs.
11999 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
12028 // |-----------------|
12031 // | TP loop Body MBB<--|
12037 MachineFunction *MF = BB->getParent();
12038 MachineFunctionProperties &Properties = MF->getProperties();
12039 MachineRegisterInfo &MRI = MF->getRegInfo();
12047 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12050 MF->push_back(TpLoopBody);
12062 TpExit = BB->splitAt(MI, false);
12064 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12066 TpExit = BB->getFallThrough();
12067 BuildMI(BB, dl, TII->get(ARM::t2B))
12070 TpExit = BB->splitAt(MI, false);
12086 TpEntry->addSuccessor(TpLoopBody);
12087 TpLoopBody->addSuccessor(TpLoopBody);
12088 TpLoopBody->addSuccessor(TpExit);
12091 TpLoopBody->moveAfter(TpEntry);
12092 TpExit->moveAfter(TpLoopBody);
12102 // The Thumb2 pre-indexed stores have the same MI operands, they just
12106 MI.setDesc(TII->get(ARM::t2STR_PRE));
12109 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12112 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12124 Offset = -Offset;
12127 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12148 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12157 // diamond control-flow pattern. The incoming instruction knows the
12160 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12161 MachineFunction::iterator It = ++BB->getIterator();
12168 // fallthrough --> copy0MBB
12170 MachineFunction *F = BB->getParent();
12171 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12172 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12173 F->insert(It, copy0MBB);
12174 F->insert(It, sinkMBB);
12177 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12178 copy0MBB->setCallFrameSize(CallFrameSize);
12179 sinkMBB->setCallFrameSize(CallFrameSize);
12182 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12185 copy0MBB->addLiveIn(ARM::CPSR);
12186 sinkMBB->addLiveIn(ARM::CPSR);
12190 sinkMBB->splice(sinkMBB->begin(), BB,
12191 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12192 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12194 BB->addSuccessor(copy0MBB);
12195 BB->addSuccessor(sinkMBB);
12197 BuildMI(BB, dl, TII->get(ARM::tBcc))
12207 // Update machine-CFG edges
12208 BB->addSuccessor(sinkMBB);
12214 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12227 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12236 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12240 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12246 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12250 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12260 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12263 BuildMI(BB, dl, TII->get(ARM::t2B))
12267 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12287 // diamond control-flow pattern. The incoming instruction knows the
12298 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12299 MachineFunction::iterator BBI = ++BB->getIterator();
12300 MachineFunction *Fn = BB->getParent();
12301 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12302 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12303 Fn->insert(BBI, RSBBB);
12304 Fn->insert(BBI, SinkBB);
12309 bool isThumb2 = Subtarget->isThumb2();
12310 MachineRegisterInfo &MRI = Fn->getRegInfo();
12317 SinkBB->splice(SinkBB->begin(), BB,
12318 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12319 SinkBB->transferSuccessorsAndUpdatePHIs(BB);
12321 BB->addSuccessor(RSBBB);
12322 BB->addSuccessor(SinkBB);
12325 RSBBB->addSuccessor(SinkBB);
12328 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12335 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12340 // by if-conversion pass
12341 BuildMI(*RSBBB, RSBBB->begin(), dl,
12342 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12350 BuildMI(*SinkBB, SinkBB->begin(), dl,
12351 TII->get(ARM::PHI), ABSDstReg)
12372 /// when it is expanded into LDM/STM. This is done as a post-isel lowering
12376 bool isThumb1 = Subtarget->isThumb1Only();
12379 MachineFunction *MF = MI.getParent()->getParent();
12380 MachineRegisterInfo &MRI = MF->getRegInfo();
12384 if (!Node->hasAnyUseOfValue(0)) {
12387 if (!Node->hasAnyUseOfValue(1)) {
12412 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12418 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12419 MCID = &TII->get(NewOpc);
12421 assert(MCID->getNumOperands() ==
12422 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12432 if (Subtarget->isThumb1Only()) {
12433 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12439 for (unsigned i = MI.getNumOperands(); i--;) {
12442 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12443 if (DefIdx != -1)
12452 ccOutIdx = MCID->getNumOperands() - 1;
12454 ccOutIdx = MCID->getNumOperands() - 1;
12458 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12466 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12481 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12486 if (!Subtarget->isThumb1Only())
12497 //===----------------------------------------------------------------------===//
12499 //===----------------------------------------------------------------------===//
12513 // (select cc -1, y) [AllOnes=1]
12514 // (select cc y, -1) [AllOnes=1]
12522 switch (N->getOpcode()) {
12525 CC = N->getOperand(0);
12526 SDValue N1 = N->getOperand(1);
12527 SDValue N2 = N->getOperand(2);
12547 EVT VT = N->getValueType(0);
12548 CC = N->getOperand(0);
12556 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12568 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12569 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12570 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12571 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12572 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12579 // (add (zext cc), x) -> (select cc (add x, 1), x)
12580 // (add (sext cc), x) -> (select cc (add x, -1), x)
12595 EVT VT = N->getValueType(0);
12605 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12619 SDValue N0 = N->getOperand(0);
12620 SDValue N1 = N->getOperand(1);
12621 if (N0.getNode()->hasOneUse())
12624 if (N1.getNode()->hasOneUse())
12632 if (N->getOpcode() == ARMISD::VUZP)
12636 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12650 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12651 if (!N->getValueType(0).is64BitVector())
12659 EVT VT = N->getValueType(0);
12664 Ops.push_back(Unzip->getOperand(0));
12665 Ops.push_back(Unzip->getOperand(1));
12698 EVT VT = N->getValueType(0);
12728 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12734 EVT VT = N->getValueType(0);
12745 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12747 SDValue Vec = N0->getOperand(0)->getOperand(0);
12754 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12755 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
12756 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12758 SDValue ExtVec0 = N0->getOperand(i);
12759 SDValue ExtVec1 = N1->getOperand(i);
12762 if (V != ExtVec0->getOperand(0).getNode() ||
12763 V != ExtVec1->getOperand(0).getNode())
12767 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12768 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12771 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12772 || C1->getZExtValue() != nextIndex+1)
12821 if (V->getOpcode() == ISD::UMUL_LOHI ||
12822 V->getOpcode() == ISD::SMUL_LOHI)
12830 if (!Subtarget->hasBaseDSP())
12833 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12834 // accumulates the product into a 64-bit value. The 16-bit values will
12835 // be sign extended somehow or SRA'd into 32-bit values
12837 SDValue Mul = AddcNode->getOperand(0);
12838 SDValue Lo = AddcNode->getOperand(1);
12840 Lo = AddcNode->getOperand(0);
12841 Mul = AddcNode->getOperand(1);
12846 SDValue SRA = AddeNode->getOperand(0);
12847 SDValue Hi = AddeNode->getOperand(1);
12849 SRA = AddeNode->getOperand(1);
12850 Hi = AddeNode->getOperand(0);
12855 if (Const->getZExtValue() != 31)
12883 Op0 = Mul->getOperand(0).getOperand(0);
12884 Op1 = Mul->getOperand(1).getOperand(0);
12916 // loAdd -> ADDC |
12919 // ADDE <- hiAdd
12927 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12928 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12931 assert(AddeSubeNode->getNumOperands() == 3 &&
12932 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12936 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12937 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12938 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12939 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12940 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12943 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12944 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12950 assert(AddcSubcNode->getNumValues() == 2 &&
12951 AddcSubcNode->getValueType(0) == MVT::i32 &&
12955 // maybe a SMLAL which multiplies two 16-bit values.
12956 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12957 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12958 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12959 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12960 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12964 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12965 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12982 unsigned Opc = MULOp->getOpcode();
13016 if (AddcSubcNode == HiAddSub->getNode() ||
13017 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
13025 Ops.push_back(LoMul->getOperand(0));
13026 Ops.push_back(LoMul->getOperand(1));
13032 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13033 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13034 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13035 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13038 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13047 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13079 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13083 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13084 if (AddcNode->getOpcode() != ARMISD::ADDC)
13090 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13091 UmlalNode = AddcNode->getOperand(0).getNode();
13092 AddHi = AddcNode->getOperand(1);
13093 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13094 UmlalNode = AddcNode->getOperand(1).getNode();
13095 AddHi = AddcNode->getOperand(0);
13102 if (!isNullConstant(UmlalNode->getOperand(3)))
13105 if ((isNullConstant(AddeNode->getOperand(0)) &&
13106 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13107 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13108 isNullConstant(AddeNode->getOperand(1)))) {
13110 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13111 UmlalNode->getOperand(2), AddHi };
13127 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13132 SDNode* AddcNode = N->getOperand(2).getNode();
13133 SDNode* AddeNode = N->getOperand(3).getNode();
13134 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13135 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13136 isNullConstant(AddeNode->getOperand(0)) &&
13137 isNullConstant(AddeNode->getOperand(1)) &&
13138 (AddeNode->getOperand(2).getNode() == AddcNode))
13141 {N->getOperand(0), N->getOperand(1),
13142 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13152 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13153 // (SUBC (ADDE 0, 0, C), 1) -> C
13154 SDValue LHS = N->getOperand(0);
13155 SDValue RHS = N->getOperand(1);
13156 if (LHS->getOpcode() == ARMISD::ADDE &&
13157 isNullConstant(LHS->getOperand(0)) &&
13158 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13159 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13163 if (Subtarget->isThumb1Only()) {
13164 SDValue RHS = N->getOperand(1);
13166 int32_t imm = C->getSExtValue();
13169 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13170 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13172 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13183 if (Subtarget->isThumb1Only()) {
13185 SDValue RHS = N->getOperand(1);
13187 int64_t imm = C->getSExtValue();
13191 // The with-carry-in form matches bitwise not instead of the negation.
13196 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13198 return DAG.getNode(Opcode, DL, N->getVTList(),
13199 N->getOperand(0), RHS, N->getOperand(2));
13202 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13211 if (!Subtarget->hasMVEIntegerOps())
13222 if (N->getOpcode() == ISD::SELECT &&
13223 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13224 SetCC = N->getOperand(0);
13225 LHS = SetCC->getOperand(0);
13226 RHS = SetCC->getOperand(1);
13227 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13228 TrueVal = N->getOperand(1);
13229 FalseVal = N->getOperand(2);
13230 } else if (N->getOpcode() == ISD::SELECT_CC) {
13231 LHS = N->getOperand(0);
13232 RHS = N->getOperand(1);
13233 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13234 TrueVal = N->getOperand(2);
13235 FalseVal = N->getOperand(3);
13241 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13242 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13247 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13248 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13253 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13254 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13259 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13260 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13269 switch (TrueVal->getOpcode()) {
13279 EVT VectorType = FalseVal->getOperand(0).getValueType();
13291 EVT LeftType = LHS->getValueType(0);
13292 EVT RightType = RHS->getValueType(0);
13304 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13320 EVT VT = N->getValueType(0);
13327 if (N->getOpcode() == ISD::SMIN) {
13328 Shft = N->getOperand(0);
13329 Clamp = isConstOrConstSplat(N->getOperand(1));
13330 } else if (N->getOpcode() == ISD::VSELECT) {
13332 SDValue Cmp = N->getOperand(0);
13334 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13335 Cmp.getOperand(0) != N->getOperand(1) ||
13336 Cmp.getOperand(1) != N->getOperand(2))
13338 Shft = N->getOperand(1);
13339 Clamp = isConstOrConstSplat(N->getOperand(2));
13348 switch (Clamp->getSExtValue()) {
13349 case (1 << 7) - 1:
13353 case (1 << 15) - 1:
13357 case (1ULL << 31) - 1:
13368 if (!N1 || N1->getSExtValue() != ShftAmt)
13430 if (!Subtarget->hasMVEIntegerOps())
13438 // We need to re-implement this optimization here as the implementation in the
13439 // Target-Independent DAGCombiner does not handle the kind of constant we make
13440 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13445 if (N->getOperand(0).getOpcode() != ISD::XOR)
13447 SDValue XOR = N->getOperand(0);
13453 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13455 if (!Const || !Const->isOne())
13459 SDValue Cond = XOR->getOperand(0);
13460 SDValue LHS = N->getOperand(1);
13461 SDValue RHS = N->getOperand(2);
13462 EVT Type = N->getValueType(0);
13466 // Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13470 SDValue Op0 = N->getOperand(0);
13471 SDValue Op1 = N->getOperand(1);
13472 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13473 EVT VT = N->getValueType(0);
13475 if (!Subtarget->hasMVEIntegerOps() ||
13525 /// PerformADDECombine - Target-specific dag combine transform from
13532 if (Subtarget->isThumb1Only())
13541 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13559 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13560 if (N0.getNode()->hasOneUse())
13567 EVT VT = N->getValueType(0);
13568 SDValue N0 = N->getOperand(0);
13569 SDValue N1 = N->getOperand(1);
13585 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13590 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13594 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13597 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13611 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13612 N1.getOperand(1 - N1RedOp));
13644 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13645 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13646 Load1->isIndexed())
13657 return -1;
13664 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13713 if (!Subtarget->hasMVEIntegerOps())
13719 EVT VT = N->getValueType(0);
13720 SDValue N0 = N->getOperand(0);
13721 SDValue N1 = N->getOperand(1);
13737 if (NB->getOpcode() != ISD::BUILD_PAIR)
13739 SDValue VecRed = NB->getOperand(0);
13740 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13742 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13745 if (VecRed->getOpcode() == OpcodeA) {
13746 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13755 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13757 Ops.push_back(VecRed->getOperand(I));
13802 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13803 N->getOpcode() == ISD::SRL) &&
13809 if (N->getOpcode() != ISD::SHL)
13812 if (Subtarget->isThumb1Only()) {
13816 if (N->getOpcode() != ISD::SHL)
13818 SDValue N1 = N->getOperand(0);
13819 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13820 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13822 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13823 if (Const->getAPIntValue().ult(256))
13825 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13826 Const->getAPIntValue().sgt(-256))
13832 // Turn off commute-with-shift transform after legalization, so it doesn't
13841 assert(N->getOpcode() == ISD::XOR &&
13842 (N->getOperand(0).getOpcode() == ISD::SHL ||
13843 N->getOperand(0).getOpcode() == ISD::SRL) &&
13847 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13848 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13851 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13852 unsigned ShiftAmt = ShiftC->getZExtValue();
13853 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13854 if (N->getOperand(0).getOpcode() == ISD::SHL)
13855 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13856 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13865 assert(((N->getOpcode() == ISD::SHL &&
13866 N->getOperand(0).getOpcode() == ISD::SRL) ||
13867 (N->getOpcode() == ISD::SRL &&
13868 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13869 "Expected shift-shift mask");
13871 if (!Subtarget->isThumb1Only())
13882 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13886 if (!Subtarget->hasNEON()) {
13887 if (Subtarget->isThumb1Only())
13901 return Subtarget->hasVFP2Base();
13903 return Subtarget->hasVFP2Base();
13905 return Subtarget->hasFP64();
13908 return Subtarget->hasMVEFloatOps();
13922 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13923 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13933 // - if c1 and c2 are small enough that they don't require mov imms.
13934 // - the user(s) of the node can perform an shl
13936 // No shifted operands for 16-bit instructions.
13937 if (ST->isThumb() && ST->isThumb1Only())
13941 for (auto *U : N->uses()) {
13942 switch(U->getOpcode()) {
13955 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13956 isa<ConstantSDNode>(U->getOperand(1)))
13960 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13961 U->getOperand(1).getOpcode() == ISD::SHL)
13967 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13968 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13971 if (N->getOperand(0).getOpcode() != ISD::SHL)
13974 SDValue SHL = N->getOperand(0);
13976 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13981 APInt C2Int = C2->getAPIntValue();
13982 APInt C1Int = C1ShlC2->getAPIntValue();
13989 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13996 // The immediates are encoded as an 8-bit value that can be rotated.
13999 return Imm.getBitWidth() - Zeros > 8;
14008 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
14014 SHL.dump(); N->dump());
14020 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14025 SDValue N0 = N->getOperand(0);
14026 SDValue N1 = N->getOperand(1);
14043 // Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14044 // providing -X is as cheap as X (currently, just a constant).
14046 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14048 SDValue CSINC = N->getOperand(1);
14057 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14063 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14068 SDValue N0 = N->getOperand(0);
14069 SDValue N1 = N->getOperand(1);
14071 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14072 if (N1.getNode()->hasOneUse())
14079 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14082 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14085 SDValue VDup = N->getOperand(1);
14086 if (VDup->getOpcode() != ARMISD::VDUP)
14089 SDValue VMov = N->getOperand(0);
14090 if (VMov->getOpcode() == ISD::BITCAST)
14091 VMov = VMov->getOperand(0);
14093 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14099 VDup->getOperand(0));
14100 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14121 if (!Subtarget->hasVMLxForwarding())
14125 SDValue N0 = N->getOperand(0);
14126 SDValue N1 = N->getOperand(1);
14140 EVT VT = N->getValueType(0);
14142 SDValue N00 = N0->getOperand(0);
14143 SDValue N01 = N0->getOperand(1);
14151 EVT VT = N->getValueType(0);
14155 SDValue N0 = N->getOperand(0);
14156 SDValue N1 = N->getOperand(1);
14159 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14161 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14163 return Op->getOperand(0);
14168 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14172 if (!Subtarget->isLittle())
14176 if (And->getOpcode() == ISD::BITCAST)
14177 And = And->getOperand(0);
14178 if (And->getOpcode() != ISD::AND)
14180 SDValue Mask = And->getOperand(1);
14181 if (Mask->getOpcode() == ISD::BITCAST)
14182 Mask = Mask->getOperand(0);
14184 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14187 if (isAllOnesConstant(Mask->getOperand(0)) &&
14188 isNullConstant(Mask->getOperand(1)) &&
14189 isAllOnesConstant(Mask->getOperand(2)) &&
14190 isNullConstant(Mask->getOperand(3)))
14191 return And->getOperand(0);
14219 EVT VT = N->getValueType(0);
14220 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14223 if (Subtarget->isThumb1Only())
14234 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14238 int64_t MulAmt = C->getSExtValue();
14241 ShiftAmt = ShiftAmt & (32 - 1);
14242 SDValue V = N->getOperand(0);
14249 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14255 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14258 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14268 uint64_t MulAmtAbs = -MulAmt;
14270 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14277 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14278 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14283 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14303 // Allow DAGCombine to pattern-match before we touch the canonical form.
14307 if (N->getValueType(0) != MVT::i32)
14310 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14314 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14319 SDNode *N0 = N->getOperand(0).getNode();
14320 if (!N0->hasOneUse())
14323 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14326 bool LeftShift = N0->getOpcode() == ISD::SHL;
14328 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14332 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14338 C1 &= (-1U << C2);
14340 C1 &= (-1U >> C2);
14354 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14355 DAG.getConstant(C3 - C2, DL, MVT::i32));
14365 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14366 DAG.getConstant(C3 - C2, DL, MVT::i32));
14378 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14391 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14403 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14415 // Attempt to use immediate-form VBIC
14416 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14418 EVT VT = N->getValueType(0);
14428 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14429 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14438 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14445 if (!Subtarget->isThumb1Only()) {
14446 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14454 if (Subtarget->isThumb1Only())
14465 if (!Subtarget->hasV6Ops() ||
14466 (Subtarget->isThumb() &&
14467 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14470 SDValue SRL = OR->getOperand(0);
14471 SDValue SHL = OR->getOperand(1);
14474 SRL = OR->getOperand(1);
14475 SHL = OR->getOperand(0);
14493 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14494 // For SMUWB the 16-bit value will signed extended somehow.
14497 SDValue OpS16 = SMULLOHI->getOperand(0);
14498 SDValue OpS32 = SMULLOHI->getOperand(1);
14503 OpS32 = SMULLOHI->getOperand(0);
14512 OpS16 = OpS16->getOperand(0);
14526 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14529 EVT VT = N->getValueType(0);
14530 SDValue N0 = N->getOperand(0);
14531 SDValue N1 = N->getOperand(1);
14556 unsigned Mask = MaskC->getZExtValue();
14563 unsigned Val = N1C->getZExtValue();
14584 unsigned Mask2 = N11C->getZExtValue();
14592 if (Subtarget->hasDSP() &&
14609 if (Subtarget->hasDSP() &&
14625 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14631 unsigned ShAmtC = ShAmt->getAsZExtVal();
14666 if (N->getOpcode() == ARMISD::VCMP)
14667 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14668 else if (N->getOpcode() == ARMISD::VCMPZ)
14669 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14676 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14681 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14683 EVT VT = N->getValueType(0);
14685 SDValue N0 = N->getOperand(0);
14686 SDValue N1 = N->getOperand(1);
14689 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14704 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14708 // Attempt to use immediate-form VORR
14709 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14711 EVT VT = N->getValueType(0);
14717 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14724 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14725 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14734 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14741 if (!Subtarget->isThumb1Only()) {
14742 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14749 SDValue N0 = N->getOperand(0);
14750 SDValue N1 = N->getOperand(1);
14753 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14767 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14768 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14770 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14772 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14783 N0->getOperand(1),
14784 N0->getOperand(0),
14785 N1->getOperand(0));
14808 EVT VT = N->getValueType(0);
14814 if (!Subtarget->isThumb1Only()) {
14815 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14823 if (Subtarget->hasMVEIntegerOps()) {
14825 SDValue N0 = N->getOperand(0);
14826 SDValue N1 = N->getOperand(1);
14827 const TargetLowering *TLI = Subtarget->getTargetLowering();
14828 if (TLI->isConstTrueVal(N1) &&
14829 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14835 Ops.push_back(N0->getOperand(0));
14836 if (N0->getOpcode() == ARMISD::VCMP)
14837 Ops.push_back(N0->getOperand(1));
14839 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14847 // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14851 assert(N->getOpcode() == ARMISD::BFI);
14853 SDValue From = N->getOperand(1);
14854 ToMask = ~N->getConstantOperandAPInt(2);
14859 if (From->getOpcode() == ISD::SRL &&
14860 isa<ConstantSDNode>(From->getOperand(1))) {
14861 APInt Shift = From->getConstantOperandAPInt(1);
14864 From = From->getOperand(0);
14875 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14876 return LastActiveBitInA - 1 == FirstActiveBitInB;
14883 SDValue To = N->getOperand(0);
14911 SDValue N0 = N->getOperand(0);
14912 SDValue N1 = N->getOperand(1);
14915 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14920 unsigned InvMask = N->getConstantOperandVal(2);
14922 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14926 unsigned Mask = (1u << Width) - 1;
14927 unsigned Mask2 = N11C->getZExtValue();
14929 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14930 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14949 EVT VT = N->getValueType(0);
14963 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14964 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14971 EVT VT = N->getValueType(0);
14974 N->getOperand(1), N->getOperand(2));
14986 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14988 SDValue CSInc = Cmp->getOperand(0);
14995 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
15000 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15005 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15010 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
15033 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15034 // if C1==EQ -> CSXYZ A, B, C2, D
15035 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15037 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15038 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15039 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15040 N->getOperand(1),
15042 if (N->getConstantOperandVal(2) == ARMCC::NE)
15044 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15045 N->getOperand(1),
15051 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15056 // vmovrrd(vmovdrr x, y) -> x,y
15057 SDValue InDouble = N->getOperand(0);
15058 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15061 // vmovrrd(load f64) -> (load i32), (load i32)
15063 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15064 InNode->getValueType(0) == MVT::f64 &&
15065 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15066 !cast<LoadSDNode>(InNode)->isVolatile()) {
15067 // TODO: Should this be done for non-FrameIndex operands?
15072 SDValue BasePtr = LD->getBasePtr();
15074 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15075 LD->getAlign(), LD->getMemOperand()->getFlags());
15080 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15081 LD->getPointerInfo().getWithOffset(4),
15082 commonAlignment(LD->getAlign(), 4),
15083 LD->getMemOperand()->getFlags());
15092 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15093 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15116 if (!Subtarget->isLittle() && BVSwap)
15134 if (!Subtarget->isLittle() && BVSwap)
15143 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15146 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15147 SDValue Op0 = N->getOperand(0);
15148 SDValue Op1 = N->getOperand(1);
15157 N->getValueType(0), Op0.getOperand(0));
15163 SDValue Op0 = N->getOperand(0);
15165 // VMOVhr (VMOVrh (X)) -> X
15166 if (Op0->getOpcode() == ARMISD::VMOVrh)
15167 return Op0->getOperand(0);
15169 // FullFP16: half values are passed in S-registers, and we don't
15177 if (Op0->getOpcode() == ISD::BITCAST) {
15178 SDValue Copy = Op0->getOperand(0);
15180 Copy->getOpcode() == ISD::CopyFromReg) {
15181 bool HasGlue = Copy->getNumOperands() == 3;
15182 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15183 HasGlue ? Copy->getOperand(2) : SDValue()};
15184 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15201 // fold (VMOVhr (load x)) -> (load (f16*)x)
15203 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15204 LN0->getMemoryVT() == MVT::i16) {
15206 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15207 LN0->getBasePtr(), LN0->getMemOperand());
15224 SDValue N0 = N->getOperand(0);
15225 EVT VT = N->getValueType(0);
15227 // fold (VMOVrh (fpconst x)) -> const x
15229 APFloat V = C->getValueAPF();
15233 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15238 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15239 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15245 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15246 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15247 isa<ConstantSDNode>(N0->getOperand(1)))
15248 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15249 N0->getOperand(1));
15254 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15255 /// are normal, non-volatile loads. If so, it is profitable to bitcast an
15259 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15261 SDNode *Elt = N->getOperand(i).getNode();
15262 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15268 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15273 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15278 if (N->getNumOperands() == 2)
15284 EVT VT = N->getValueType(0);
15291 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15301 /// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15315 // 2. The size of its operands are 32-bits (64-bits are not legal).
15316 EVT VT = N->getValueType(0);
15320 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15327 SDNode *Use = *N->use_begin();
15328 if (Use->getOpcode() != ISD::BITCAST ||
15329 Use->getValueType(0).isFloatingPoint())
15341 SDValue Elt = N->getOperand(Idx);
15342 if (Elt->getOpcode() == ISD::BITCAST) {
15344 if (Elt->getOperand(0).getValueType() == MVT::i32)
15349 --NumOfRelevantElts;
15352 // Check if more than half of the elements require a non-free bitcast.
15367 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15372 SDValue V = N->getOperand(Idx);
15376 V->getOperand(0).getValueType() == MVT::i32)
15395 EVT VT = N->getValueType(0);
15396 SDValue Op = N->getOperand(0);
15400 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15402 if (Op->getOperand(0).getValueType() == VT)
15403 return Op->getOperand(0);
15404 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15407 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15411 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15429 EVT VT = N->getValueType(0);
15430 SDValue Op = N->getOperand(0);
15434 if (ST->isLittle())
15437 // VECTOR_REG_CAST undef -> undef
15442 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15444 if (Op->getOperand(0).getValueType() == VT)
15445 return Op->getOperand(0);
15446 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15454 if (!Subtarget->hasMVEIntegerOps())
15457 EVT VT = N->getValueType(0);
15458 SDValue Op0 = N->getOperand(0);
15459 SDValue Op1 = N->getOperand(1);
15460 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15463 // vcmp X, 0, cc -> vcmpz X, cc
15465 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15469 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15473 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15474 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15482 /// PerformInsertEltCombine - Target-specific dag combine xforms for
15488 EVT VT = N->getValueType(0);
15489 SDNode *Elt = N->getOperand(1).getNode();
15491 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15498 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15499 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15504 Vec, V, N->getOperand(2));
15510 // extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15511 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15514 EVT VT = N->getValueType(0);
15529 if (Ext->use_size() == 1 &&
15530 (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15531 Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15542 auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15543 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15544 isa<ConstantSDNode>(V->getOperand(1)) &&
15545 V->getConstantOperandVal(1) == Lane + 1 &&
15546 V->getOperand(0).getResNo() == ResNo;
15548 if (OtherIt == Op0->uses().end())
15555 if (OtherExt->use_size() != 1 ||
15556 OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15557 OtherExt->use_begin()->getValueType(0) != MVT::i32)
15559 OtherExt = SDValue(*OtherExt->use_begin(), 0);
15577 SDValue Op0 = N->getOperand(0);
15578 EVT VT = N->getValueType(0);
15581 // extract (vdup x) -> x
15582 if (Op0->getOpcode() == ARMISD::VDUP) {
15583 SDValue X = Op0->getOperand(0);
15591 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15592 X = X->getOperand(0);
15597 // extract ARM_BUILD_VECTOR -> x
15598 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15599 isa<ConstantSDNode>(N->getOperand(1)) &&
15600 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15601 return Op0.getOperand(N->getConstantOperandVal(1));
15604 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15606 isa<ConstantSDNode>(N->getOperand(1)) &&
15611 unsigned Offset = N->getConstantOperandVal(1);
15614 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15617 // extract x, n; extract x, n+1 -> VMOVRRD x
15621 // extract (MVETrunc(x)) -> extract x
15622 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15623 unsigned Idx = N->getConstantOperandVal(1);
15625 Idx / Op0->getOperand(0).getValueType().getVectorNumElements();
15627 Idx % Op0->getOperand(0).getValueType().getVectorNumElements();
15636 SDValue Op = N->getOperand(0);
15637 EVT VT = N->getValueType(0);
15639 // sext_inreg(VGETLANEu) -> VGETLANEs
15641 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15651 SDValue Vec = N->getOperand(0);
15652 SDValue SubVec = N->getOperand(1);
15653 uint64_t IdxVal = N->getConstantOperandVal(2);
15673 // Fold insert_subvector -> concat_vectors
15674 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15675 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15690 // shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15693 SDValue Trunc = N->getOperand(0);
15695 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15699 if (isVMOVNTruncMask(N->getMask(), VT, false))
15705 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15714 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15725 // targets, but for NEON it is better to concatenate two double-register
15726 // size vector operands into a single quad-register size vector. Do that
15728 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15730 SDValue Op0 = N->getOperand(0);
15731 SDValue Op1 = N->getOperand(1);
15743 EVT VT = N->getValueType(0);
15757 int MaskElt = SVN->getMaskElt(n);
15758 int NewElt = -1;
15762 NewElt = HalfElts + MaskElt - NumElts;
15805 unsigned IntNo = N->getConstantOperandVal(1);
15926 switch (N->getOpcode()) {
15962 VecTy = N->getValueType(0);
15964 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15968 VecTy = N->getOperand(1).getValueType();
15980 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15981 // separate instructions that make it harder to use a non-constant update.
15992 Align Alignment = MemN->getAlign();
15994 // If this is a less-than-standard-aligned load/store, change the type to
15998 // There are 3 ways to get to this base-update combine:
15999 // - intrinsics: they are assumed to be properly aligned (to the standard
16001 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
16003 // - generic load/store instructions: the alignment is specified as an
16007 // generate non-standard-aligned ARMISD::VLDx nodes.
16011 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
16039 Ops.push_back(N->getOperand(0)); // incoming chain
16040 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16045 Ops.push_back(StN->getValue());
16050 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16052 Ops.push_back(N->getOperand(i));
16058 // If this is a non-standard-aligned STORE, the penultimate operand is the
16060 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16061 SDValue &StVal = Ops[Ops.size() - 2];
16067 MemN->getMemOperand());
16074 // If this is an non-standard-aligned LOAD, the first result is the loaded
16076 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16088 // If (opcode ptr inc) is and ADD-like instruction, return the
16099 return CInc->getZExtValue();
16103 return CInc->getZExtValue();
16113 switch (N->getOpcode()) {
16116 if (isa<ConstantSDNode>(N->getOperand(1))) {
16117 *Ptr = N->getOperand(0);
16118 *CInc = N->getOperand(1);
16124 if (isa<ConstantSDNode>(N->getOperand(2))) {
16125 *Ptr = N->getOperand(1);
16126 *CInc = N->getOperand(2);
16151 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16158 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16159 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16160 const bool isStore = N->getOpcode() == ISD::STORE;
16164 SDValue Addr = N->getOperand(AddrOpIdx);
16169 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16170 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
16173 User->getNumOperands() != 2)
16176 SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
16178 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16180 if (ConstInc || User->getOpcode() == ISD::ADD)
16190 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16191 for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16196 User->getNumOperands() != 2)
16199 SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16201 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16206 unsigned NewConstInc = UserOffset - Offset;
16220 --NumValidUpd;
16231 // Try to fold with other users. Non-constant updates are considered
16259 SDValue Addr = N->getOperand(2);
16264 // to post-inc the last of the them.
16265 unsigned IntNo = N->getConstantOperandVal(1);
16266 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16268 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16272 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16273 UE = Addr.getNode()->use_end();
16276 if (User->getOpcode() != ISD::ADD ||
16322 VecTy = N->getValueType(0);
16324 VecTy = N->getOperand(3).getValueType();
16330 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16332 if (!CInc || CInc->getZExtValue() != NumBytes)
16348 Ops.push_back(N->getOperand(0)); // incoming chain
16349 Ops.push_back(N->getOperand(2)); // ptr
16352 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16353 Ops.push_back(N->getOperand(i));
16356 MemN->getMemOperand());
16373 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16374 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16375 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16379 EVT VT = N->getValueType(0);
16380 // vldN-dup instructions only support 64-bit vectors for N > 1.
16384 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16385 SDNode *VLD = N->getOperand(0).getNode();
16386 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16390 unsigned IntNo = VLD->getConstantOperandVal(1);
16404 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16406 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16407 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16413 if (User->getOpcode() != ARMISD::VDUPLANE ||
16414 VLDLaneNo != User->getConstantOperandVal(1))
16418 // Create the vldN-dup node.
16425 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16428 Ops, VLDMemInt->getMemoryVT(),
16429 VLDMemInt->getMemOperand());
16432 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16442 // Now the vldN-lane intrinsic is dead except for its chain result.
16453 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
16458 SDValue Op = N->getOperand(0);
16459 EVT VT = N->getValueType(0);
16462 if (Subtarget->hasMVEIntegerOps()) {
16468 N->getOperand(0), N->getOperand(1));
16472 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16473 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16486 // The canonical VMOV for a zero vector uses a 32-bit element size.
16497 /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16500 SDValue Op = N->getOperand(0);
16503 if (Subtarget->hasMVEIntegerOps()) {
16504 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16507 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16510 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16514 if (!Subtarget->hasNEON())
16517 // Match VDUP(LOAD) -> VLD1DUP.
16521 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16522 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16523 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16524 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16525 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16528 LD->getMemoryVT(), LD->getMemOperand());
16539 EVT VT = N->getValueType(0);
16542 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16554 SDValue StVal = St->getValue();
16556 if (!St->isTruncatingStore() || !VT.isVector())
16559 EVT StVT = St->getMemoryVT();
16584 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16586 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16608 // Bitcast the original vector into a vector of store-size units
16617 SDValue BasePtr = St->getBasePtr();
16625 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16626 St->getAlign(), St->getMemOperand()->getFlags());
16639 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16641 SDValue Trunc = St->getValue();
16642 if (Trunc->getOpcode() != ISD::FP_ROUND)
16644 EVT FromVT = Trunc->getOperand(0).getValueType();
16667 ArrayRef<int> M = SVN->getMask();
16669 if (SVN->getOperand(1).isUndef())
16692 SDValue Ch = St->getChain();
16693 SDValue BasePtr = St->getBasePtr();
16694 Align Alignment = St->getOriginalAlign();
16695 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16696 AAMDNodes AAInfo = St->getAAInfo();
16720 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16732 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16734 SDValue Trunc = St->getValue();
16735 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16737 EVT FromVT = Trunc->getOperand(0).getValueType();
16743 SDValue Ch = St->getChain();
16744 SDValue BasePtr = St->getBasePtr();
16745 Align Alignment = St->getOriginalAlign();
16746 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16747 AAMDNodes AAInfo = St->getAAInfo();
16761 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16771 // use of more integer post-inc stores not available with vstr.
16773 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16775 SDValue Extract = St->getValue();
16779 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16791 SDValue Ch = St->getChain();
16792 SDValue BasePtr = St->getBasePtr();
16793 Align Alignment = St->getOriginalAlign();
16794 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16795 AAMDNodes AAInfo = St->getAAInfo();
16798 St->getPointerInfo(), NewToVT, Alignment,
16804 /// PerformSTORECombine - Target-specific dag combine xforms for
16810 if (St->isVolatile())
16812 SDValue StVal = St->getValue();
16815 if (Subtarget->hasNEON())
16819 if (Subtarget->hasMVEFloatOps())
16823 if (Subtarget->hasMVEIntegerOps()) {
16836 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16837 StVal.getNode()->hasOneUse()) {
16841 SDValue BasePtr = St->getBasePtr();
16843 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16844 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16845 St->getMemOperand()->getFlags());
16850 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16851 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16852 St->getOriginalAlign(),
16853 St->getMemOperand()->getFlags());
16857 StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16875 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16876 St->getPointerInfo(), St->getAlign(),
16877 St->getMemOperand()->getFlags(), St->getAAInfo());
16881 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16888 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16889 /// can replace combinations of VMUL and VCVT (floating-point to integer)
16899 if (!Subtarget->hasNEON())
16902 SDValue Op = N->getOperand(0);
16907 SDValue ConstVec = Op->getOperand(1);
16913 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16926 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16927 if (C == -1 || C == 0 || C > 32)
16931 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16936 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16940 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16947 if (!Subtarget->hasMVEFloatOps())
16950 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16953 SDValue Op0 = N->getOperand(0);
16954 SDValue Op1 = N->getOperand(1);
16955 EVT VT = N->getValueType(0);
16958 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16978 SDNodeFlags FaddFlags = N->getFlags();
16989 SDValue LHS = N->getOperand(0);
16990 SDValue RHS = N->getOperand(1);
16991 EVT VT = N->getValueType(0);
16994 if (!N->getFlags().hasAllowReassociation())
16997 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
17006 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
17008 VCMLA->setFlags(A->getFlags());
17028 /// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17029 /// can replace combinations of VCVT (integer to floating-point) and VMUL
17039 if (!Subtarget->hasNEON())
17042 SDValue Op = N->getOperand(0);
17043 unsigned OpOpcode = Op.getNode()->getOpcode();
17044 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17048 SDValue ConstVec = N->getOperand(1);
17052 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17067 if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
17078 if (C == -1 || C == 0 || C > 32)
17097 if (!ST->hasMVEIntegerOps())
17100 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17101 EVT ResVT = N->getValueType(0);
17102 SDValue N0 = N->getOperand(0);
17145 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17147 SDValue A = N0->getOperand(0);
17154 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17155 !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
17157 Mask = N0->getOperand(0);
17158 SDValue Ext = N0->getOperand(1);
17159 if (Ext->getOpcode() != ExtendCode)
17161 SDValue A = Ext->getOperand(0);
17179 if (Mul->getOpcode() == ExtendCode &&
17180 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17182 Mul = Mul->getOperand(0);
17183 if (Mul->getOpcode() != ISD::MUL)
17185 SDValue ExtA = Mul->getOperand(0);
17186 SDValue ExtB = Mul->getOperand(1);
17187 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17189 A = ExtA->getOperand(0);
17190 B = ExtB->getOperand(0);
17206 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17207 !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
17209 Mask = N0->getOperand(0);
17210 SDValue Mul = N0->getOperand(1);
17211 if (Mul->getOpcode() == ExtendCode &&
17212 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17214 Mul = Mul->getOperand(0);
17215 if (Mul->getOpcode() != ISD::MUL)
17217 SDValue ExtA = Mul->getOperand(0);
17218 SDValue ExtB = Mul->getOperand(1);
17219 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17221 A = ExtA->getOperand(0);
17222 B = ExtB->getOperand(0);
17231 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17334 if (Op->getOpcode() == ISD::VSELECT)
17335 Op = Op->getOperand(1);
17336 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17337 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17338 SDValue Mul = Op->getOperand(0);
17339 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17340 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17341 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17343 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17344 N0->getOperand(0), Ext, N0->getOperand(2));
17356 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17357 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17358 if (!Shuf || !Shuf->getOperand(1).isUndef())
17362 ArrayRef<int> Mask = Shuf->getMask();
17372 if (N->getNumOperands() != VecOp + 1) {
17373 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17374 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17379 for (SDValue Op : N->ops()) {
17385 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17390 SDValue Op0 = N->getOperand(0);
17391 SDValue Op1 = N->getOperand(1);
17392 unsigned IsTop = N->getConstantOperandVal(2);
17394 // VMOVNT a undef -> a
17395 // VMOVNB a undef -> a
17396 // VMOVNB undef a -> a
17397 if (Op1->isUndef())
17399 if (Op0->isUndef() && !IsTop)
17404 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17405 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17406 Op1->getConstantOperandVal(2) == 0)
17407 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17408 Op0, Op1->getOperand(1), N->getOperand(2));
17413 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17430 SDValue Op0 = N->getOperand(0);
17431 unsigned IsTop = N->getConstantOperandVal(2);
17433 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17446 EVT VT = N->getValueType(0);
17447 SDValue LHS = N->getOperand(0);
17448 SDValue RHS = N->getOperand(1);
17452 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17453 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17457 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17460 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17467 SDValue Op0 = N->getOperand(0);
17468 SDValue Op1 = N->getOperand(1);
17470 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17472 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17473 int ShiftAmt = C->getSExtValue();
17480 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17482 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17483 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17484 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17493 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17497 unsigned IntNo = N->getConstantOperandVal(0);
17505 // the build_vectors for 64-bit vector element shift counts are generally
17522 EVT VT = N->getOperand(1).getValueType();
17529 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17533 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17542 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17548 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17553 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17565 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17618 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17619 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17623 EVT VT = N->getOperand(1).getValueType();
17627 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17629 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17636 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17637 N->getOperand(1), N->getOperand(2),
17660 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17662 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17677 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17679 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17687 bool Unsigned = N->getConstantOperandVal(2);
17689 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17696 bool Unsigned = N->getConstantOperandVal(2);
17702 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17704 Ops.push_back(N->getOperand(i));
17716 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
17718 /// combining instead of DAG legalizing because the build_vectors for 64-bit
17725 EVT VT = N->getValueType(0);
17727 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17728 N->getOperand(0)->getOpcode() == ISD::AND &&
17729 N->getOperand(0)->hasOneUse()) {
17736 SDValue N0 = N->getOperand(0);
17737 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17740 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17741 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17744 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17752 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17756 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17765 if (ST->hasMVEIntegerOps())
17770 switch (N->getOpcode()) {
17774 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17776 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17783 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17785 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17787 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17799 SDValue N0 = N->getOperand(0);
17803 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17804 LD->getExtensionType() != ISD::NON_EXTLOAD)
17806 EVT FromVT = LD->getValueType(0);
17807 EVT ToVT = N->getValueType(0);
17828 SDValue Ch = LD->getChain();
17829 SDValue BasePtr = LD->getBasePtr();
17830 Align Alignment = LD->getOriginalAlign();
17831 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17832 AAMDNodes AAInfo = LD->getAAInfo();
17835 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17851 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17877 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17881 SDValue N0 = N->getOperand(0);
17883 // Check for sign- and zero-extensions of vector extract operations of 8- and
17884 // 16-bit vector elements. NEON and MVE support these directly. They are
17886 // to 32-bit types and it is messy to recognize the operations after that.
17887 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17891 EVT VT = N->getValueType(0);
17901 switch (N->getOpcode()) {
17915 if (ST->hasMVEIntegerOps())
17924 if (ST->hasMVEFloatOps())
17935 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17936 !Subtarget->isThumb2())
17972 /// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17976 EVT VT = N->getValueType(0);
17977 SDValue N0 = N->getOperand(0);
17982 if (!ST->hasMVEIntegerOps())
17993 if (Min->getOpcode() != ISD::SMIN)
17995 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
18000 SaturateC = APInt(32, (1 << 15) - 1, true);
18002 SaturateC = APInt(16, (1 << 7) - 1, true);
18005 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18008 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
18030 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18038 if (Min->getOpcode() != ISD::UMIN)
18043 SaturateC = APInt(32, (1 << 16) - 1, true);
18045 SaturateC = APInt(16, (1 << 8) - 1, true);
18048 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18084 const APInt *CV = &C->getAPIntValue();
18085 return CV->isPowerOf2() ? CV : nullptr;
18102 SDValue Op0 = CMOV->getOperand(0);
18103 SDValue Op1 = CMOV->getOperand(1);
18104 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18105 SDValue CmpZ = CMOV->getOperand(4);
18108 if (!isNullConstant(CmpZ->getOperand(1)))
18111 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18112 SDValue And = CmpZ->getOperand(0);
18113 if (And->getOpcode() != ISD::AND)
18115 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18118 SDValue X = And->getOperand(0);
18128 if (Op1->getOpcode() != ISD::OR)
18131 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18134 SDValue Y = Op1->getOperand(0);
18140 APInt OrCI = OrC->getAPIntValue();
18141 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18155 unsigned BitInX = AndC->logBase2();
18185 switch (N->getOpcode()) {
18191 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18200 if (Const->isZero())
18202 else if (Const->isOne())
18206 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18207 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18224 // The hwloop intrinsics that we're interested are used for control-flow,
18226 // - test.start.loop.iterations will test whether its operand is zero. If it
18228 // - loop.decrement.reg also tests whether its operand is zero. If it is
18239 SDValue Chain = N->getOperand(0);
18242 if (N->getOpcode() == ISD::BRCOND) {
18244 Cond = N->getOperand(1);
18245 Dest = N->getOperand(2);
18247 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18248 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18249 Cond = N->getOperand(2);
18250 Dest = N->getOperand(4);
18251 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18252 if (!Const->isOne() && !Const->isZero())
18254 Imm = Const->getZExtValue();
18288 unsigned IntOp = Int->getConstantOperandVal(1);
18289 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
18291 SDNode *Br = *N->use_begin();
18292 SDValue OtherTarget = Br->getOperand(1);
18296 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18346 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18349 SDValue Cmp = N->getOperand(4);
18354 EVT VT = N->getValueType(0);
18358 SDValue Chain = N->getOperand(0);
18359 SDValue BB = N->getOperand(1);
18360 SDValue ARMcc = N->getOperand(2);
18361 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
18364 // -> (brcond Chain BB CC CPSR Cmp)
18365 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18366 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18367 LHS->getOperand(0)->hasOneUse() &&
18368 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18369 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18370 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18372 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18373 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18379 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18382 SDValue Cmp = N->getOperand(4);
18387 EVT VT = N->getValueType(0);
18391 SDValue FalseVal = N->getOperand(0);
18392 SDValue TrueVal = N->getOperand(1);
18393 SDValue ARMcc = N->getOperand(2);
18394 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
18397 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18423 N->getOperand(3), Cmp);
18428 N->getOperand(3), NewCmp);
18432 // -> (cmov F T CC CPSR Cmp)
18433 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18434 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18437 LHS->getOperand(2), LHS->getOperand(3),
18438 LHS->getOperand(4));
18445 // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18446 // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18447 // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18448 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18449 N->getConstantOperandVal(2) == ARMCC::NE) {
18451 if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18452 if (N->getConstantOperandVal(2) == ARMCC::NE)
18454 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18455 N->getOperand(1),
18457 N->getOperand(3), C);
18464 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18465 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18467 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18472 // CMOV 0, 1, ==, (CMPZ x, y) ->
18476 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18480 // x - y + (0 - (x - y)) + C == C
18492 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18494 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18500 N->getOperand(3), CPSRGlue.getValue(1));
18505 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18508 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18515 N->getOperand(3), CPSRGlue.getValue(1));
18522 // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18529 // CMOV x, z, !=, (CMPZ x, 0) ->
18534 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18540 unsigned ShiftAmount = TrueConst->logBase2();
18573 SDValue Src = N->getOperand(0);
18574 EVT DstVT = N->getValueType(0);
18576 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18577 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18588 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18598 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18610 EVT VT = N->getValueType(0);
18613 // MVETrunc(Undef, Undef) -> Undef
18614 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18617 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18618 if (N->getNumOperands() == 2 &&
18619 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18620 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18621 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18622 N->getOperand(0).getOperand(1),
18623 N->getOperand(1).getOperand(0),
18624 N->getOperand(1).getOperand(1));
18626 // MVETrunc(shuffle, shuffle) -> VMOVN
18627 if (N->getNumOperands() == 2 &&
18628 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18629 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18630 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18631 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18633 if (S0->getOperand(0) == S1->getOperand(0) &&
18634 S0->getOperand(1) == S1->getOperand(1)) {
18636 SmallVector<int, 8> Mask(S0->getMask());
18637 Mask.append(S1->getMask().begin(), S1->getMask().end());
18642 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18643 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18648 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18649 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18656 if (all_of(N->ops(), [](SDValue Op) {
18663 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18664 SDValue O = N->getOperand(Op);
18682 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18683 int NumIns = N->getNumOperands();
18687 if (N->getNumOperands() == 4)
18697 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18711 SDValue N0 = N->getOperand(0);
18713 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18716 EVT FromVT = LD->getMemoryVT();
18717 EVT ToVT = N->getValueType(0);
18732 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18733 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18734 LD->getExtensionType() != ISD::EXTLOAD &&
18735 LD->getExtensionType() != NewExtType)
18741 SDValue Ch = LD->getChain();
18742 SDValue BasePtr = LD->getBasePtr();
18743 Align Alignment = LD->getOriginalAlign();
18744 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18745 AAMDNodes AAInfo = LD->getAAInfo();
18762 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18779 EVT VT = N->getValueType(0);
18781 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18784 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18788 return N->getOpcode() == ARMISD::MVESEXT
18794 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18795 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18796 SDValue Ext = Extend(N->getOperand(0));
18800 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18801 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18802 ArrayRef<int> Mask = SVN->getMask();
18804 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18806 SDValue Op0 = SVN->getOperand(0);
18807 SDValue Op1 = SVN->getOperand(1);
18820 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18824 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18829 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18833 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18839 // MVEEXT(load) -> extload, extload
18840 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18850 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18851 int NumOuts = N->getNumValues();
18854 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18856 if (N->getNumOperands() == 4)
18861 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18872 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18882 switch (N->getOpcode()) {
18999 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19001 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19006 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19008 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19017 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19019 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19020 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19025 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19027 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19029 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19030 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19035 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19037 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19039 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19040 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19045 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19047 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19048 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19056 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19058 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19059 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19065 switch (N->getConstantOperandVal(1)) {
19116 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19123 *Fast = Subtarget->hasV7Ops();
19129 // For any little-endian targets with neon, we can support unaligned ld/st
19131 // A big-endian target may also explicitly support unaligned accesses
19132 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19139 if (!Subtarget->hasMVEIntegerOps())
19159 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19183 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19201 // Let the target-independent logic figure it out.
19205 // 64-bit integers are split into their high and low parts and held in two
19209 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19211 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19212 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19239 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19258 return Subtarget->hasFullFP16();
19268 return Ext->getType()->getScalarSizeInBits() ==
19269 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19286 if (!I->getType()->isVectorTy())
19289 if (Subtarget->hasNEON()) {
19290 switch (I->getOpcode()) {
19293 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19295 Ops.push_back(&I->getOperandUse(0));
19296 Ops.push_back(&I->getOperandUse(1));
19304 if (!Subtarget->hasMVEIntegerOps())
19308 if (!I->hasOneUse())
19310 auto *Sub = cast<Instruction>(*I->users().begin());
19311 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19314 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19315 match(I->getOperand(1), m_FNeg(m_Value())))
19321 switch (I->getOpcode()) {
19338 switch (II->getIntrinsicID()) {
19373 for (auto OpIdx : enumerate(I->operands())) {
19376 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19380 if (Shuffle->getOpcode() == Instruction::BitCast)
19381 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19393 for (Use &U : Op->uses()) {
19399 Ops.push_back(&Shuffle->getOperandUse(0));
19401 Ops.push_back(&Op->getOperandUse(0));
19408 if (!Subtarget->hasMVEIntegerOps())
19410 Type *SVIType = SVI->getType();
19411 Type *ScalarType = SVIType->getScalarType();
19413 if (ScalarType->isFloatTy())
19414 return Type::getInt32Ty(SVIType->getContext());
19415 if (ScalarType->isHalfTy())
19416 return Type::getInt16Ty(SVIType->getContext());
19427 if (Ld->isExpandingLoad())
19431 if (Subtarget->hasMVEIntegerOps())
19438 if (ExtVal->use_empty() ||
19439 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19442 SDNode *U = *ExtVal->use_begin();
19443 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19444 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19451 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19457 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19464 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19469 /// ARM supports both fused and unfused multiply-add operations; we already
19475 /// patterns (and we don't have the non-fused floating point instruction).
19484 return Subtarget->hasMVEFloatOps();
19486 return Subtarget->useFPVFMx16();
19488 return Subtarget->useFPVFMx();
19490 return Subtarget->useFPVFMx64();
19519 if ((V & (Scale - 1)) != 0)
19528 if (VT.isVector() && Subtarget->hasNEON())
19530 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19531 !Subtarget->hasMVEFloatOps())
19537 V = -V;
19543 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19559 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19562 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19566 // + imm12 or - imm8
19575 /// isLegalAddressImmediate - Return true if the integer value can be used
19586 if (Subtarget->isThumb1Only())
19588 else if (Subtarget->isThumb2())
19593 V = - V;
19599 // +- imm12
19602 // +- imm8
19606 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19665 /// isLegalAddressingMode - Return true if the addressing mode represented
19689 if (Subtarget->isThumb1Only())
19692 if (Subtarget->isThumb2())
19701 if (Scale < 0) Scale = -Scale;
19708 // r +/- r
19709 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19729 /// isLegalICmpImmediate - Return true if the specified immediate is legal
19735 if (!Subtarget->isThumb())
19736 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19737 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19738 if (Subtarget->isThumb2())
19739 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19740 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19741 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19745 /// isLegalAddImmediate - Return true if the specified immediate is a legal add
19752 if (!Subtarget->isThumb())
19753 return ARM_AM::getSOImmVal(AbsImm) != -1;
19754 if (Subtarget->isThumb2())
19755 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19756 // Thumb1 only has 8-bit unsigned immediate.
19761 // (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19774 const int64_t C0 = C0Node->getSExtValue();
19775 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19789 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19794 Base = Ptr->getOperand(0);
19795 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19796 int RHSC = (int)RHS->getZExtValue();
19797 if (RHSC < 0 && RHSC > -256) {
19798 assert(Ptr->getOpcode() == ISD::ADD);
19800 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19804 isInc = (Ptr->getOpcode() == ISD::ADD);
19805 Offset = Ptr->getOperand(1);
19809 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19810 int RHSC = (int)RHS->getZExtValue();
19811 if (RHSC < 0 && RHSC > -0x1000) {
19812 assert(Ptr->getOpcode() == ISD::ADD);
19814 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19815 Base = Ptr->getOperand(0);
19820 if (Ptr->getOpcode() == ISD::ADD) {
19823 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19825 Base = Ptr->getOperand(1);
19826 Offset = Ptr->getOperand(0);
19828 Base = Ptr->getOperand(0);
19829 Offset = Ptr->getOperand(1);
19834 isInc = (Ptr->getOpcode() == ISD::ADD);
19835 Base = Ptr->getOperand(0);
19836 Offset = Ptr->getOperand(1);
19848 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19851 Base = Ptr->getOperand(0);
19852 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19853 int RHSC = (int)RHS->getZExtValue();
19854 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19855 assert(Ptr->getOpcode() == ISD::ADD);
19857 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19860 isInc = Ptr->getOpcode() == ISD::ADD;
19861 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19873 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19875 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19878 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19883 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19884 int RHSC = (int)RHS->getZExtValue();
19887 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19888 assert(Ptr->getOpcode() == ISD::ADD);
19890 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19893 isInc = Ptr->getOpcode() == ISD::ADD;
19894 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19900 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19902 Base = Ptr->getOperand(0);
19922 /// getPreIndexedAddressParts - returns true by value, base pointer and
19924 /// can be legally represented as pre-indexed load / store address.
19930 if (Subtarget->isThumb1Only())
19939 Ptr = LD->getBasePtr();
19940 VT = LD->getMemoryVT();
19941 Alignment = LD->getAlign();
19942 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19944 Ptr = ST->getBasePtr();
19945 VT = ST->getMemoryVT();
19946 Alignment = ST->getAlign();
19948 Ptr = LD->getBasePtr();
19949 VT = LD->getMemoryVT();
19950 Alignment = LD->getAlign();
19951 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19954 Ptr = ST->getBasePtr();
19955 VT = ST->getMemoryVT();
19956 Alignment = ST->getAlign();
19964 isLegal = Subtarget->hasMVEIntegerOps() &&
19967 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19969 if (Subtarget->isThumb2())
19983 /// getPostIndexedAddressParts - returns true by value, base pointer and
19985 /// combined with a load / store to form a post-indexed load / store.
19997 VT = LD->getMemoryVT();
19998 Ptr = LD->getBasePtr();
19999 Alignment = LD->getAlign();
20000 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20001 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20003 VT = ST->getMemoryVT();
20004 Ptr = ST->getBasePtr();
20005 Alignment = ST->getAlign();
20006 isNonExt = !ST->isTruncatingStore();
20008 VT = LD->getMemoryVT();
20009 Ptr = LD->getBasePtr();
20010 Alignment = LD->getAlign();
20011 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20012 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20015 VT = ST->getMemoryVT();
20016 Ptr = ST->getBasePtr();
20017 Alignment = ST->getAlign();
20018 isNonExt = !ST->isTruncatingStore();
20023 if (Subtarget->isThumb1Only()) {
20024 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
20025 // must be non-extending/truncating, i32, with an offset of 4.
20026 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
20027 if (Op->getOpcode() != ISD::ADD || !isNonExt)
20029 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
20030 if (!RHS || RHS->getZExtValue() != 4)
20035 Offset = Op->getOperand(1);
20036 Base = Op->getOperand(0);
20044 isLegal = Subtarget->hasMVEIntegerOps() &&
20046 Subtarget->isLittle(), Base, Offset,
20049 if (Subtarget->isThumb2())
20060 // Swap base ptr and offset to catch more post-index load / store when
20062 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
20063 !Subtarget->isThumb2())
20066 // Post-indexed load / store update the base pointer.
20093 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20095 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
20112 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20117 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20119 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20143 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20145 unsigned Idx = Pos->getZExtValue();
20164 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20172 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20173 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20178 // CSNEG: KnownOp0 or KnownOp1 * -1
20187 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
20220 unsigned Mask = C->getZExtValue();
20226 // If the mask is all zeros, let the target-independent code replace the
20231 // If the mask is all ones, erase the AND. (Currently, the target-independent
20237 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20240 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20262 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20264 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20271 // two-instruction sequence.
20289 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20290 isa<ConstantSDNode>(Op->getOperand(2))) {
20291 unsigned ShAmt = Op->getConstantOperandVal(2);
20293 << (32 - ShAmt)))
20297 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20315 //===----------------------------------------------------------------------===//
20317 //===----------------------------------------------------------------------===//
20321 if (!Subtarget->hasV6Ops())
20324 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20325 StringRef AsmStr = IA->getAsmString();
20339 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20340 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20341 if (Ty && Ty->getBitWidth() == 32)
20358 if (!Subtarget->hasVFP2Base())
20362 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20370 /// getConstraintType - Given a constraint letter, return the type of
20411 Type *type = CallOperandVal->getType();
20418 if (type->isIntegerTy()) {
20419 if (Subtarget->isThumb())
20426 if (type->isFloatingPointTy())
20442 if (Subtarget->isThumb())
20446 if (Subtarget->isThumb())
20450 if (Subtarget->isThumb1Only())
20509 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20531 int64_t CVal64 = C->getSExtValue();
20542 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20547 if (Subtarget->isThumb1Only()) {
20552 } else if (Subtarget->isThumb2()) {
20554 // data-processing instruction.
20555 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20559 // data-processing instruction.
20560 if (ARM_AM::getSOImmVal(CVal) != -1)
20566 if (Subtarget->isThumb1Only()) {
20567 // This must be a constant between -255 and -1, for negated ADD
20571 if (CVal >= -255 && CVal <= -1)
20574 // This must be a constant between -4095 and 4095. It is not clear
20577 if (CVal >= -4095 && CVal <= 4095)
20583 if (Subtarget->isThumb1Only()) {
20584 // A 32-bit value where only one byte has a nonzero value. Exclude
20590 } else if (Subtarget->isThumb2()) {
20592 // value in a data-processing instruction. This can be used in GCC
20596 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20600 // value in a data-processing instruction. This can be used in GCC
20604 if (ARM_AM::getSOImmVal(~CVal) != -1)
20610 if (Subtarget->isThumb1Only()) {
20611 // This must be a constant between -7 and 7,
20612 // for 3-operand ADD/SUB immediate instructions.
20613 if (CVal >= -7 && CVal < 7)
20615 } else if (Subtarget->isThumb2()) {
20617 // data-processing instruction. This can be used in GCC with an "n"
20621 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20625 // data-processing instruction. This can be used in GCC with an "n"
20629 if (ARM_AM::getSOImmVal(-CVal) != -1)
20635 if (Subtarget->isThumb1Only()) {
20644 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20650 if (Subtarget->isThumb1Only()) {
20658 if (Subtarget->isThumb1Only()) {
20659 // This must be a multiple of 4 between -508 and 508, for
20661 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20679 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20680 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20682 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20683 N->getOpcode() == ISD::SREM;
20697 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20698 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20700 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20701 N->getOpcode() == ISD::SREM;
20704 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20705 EVT ArgVT = N->getOperand(i).getValueType();
20707 Entry.Node = N->getOperand(i);
20713 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20719 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20720 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20721 Subtarget->isTargetWindows()) &&
20722 "Register-based DivRem lowering only");
20723 unsigned Opcode = Op->getOpcode();
20727 EVT VT = Op->getValueType(0);
20737 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20746 // rem = a - b * div
20749 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20750 : Subtarget->hasDivideInARMMode();
20751 if (hasDivide && Op->getValueType(0).isSimple() &&
20752 Op->getSimpleValueType(0) == MVT::i32) {
20754 const SDValue Dividend = Op->getOperand(0);
20755 const SDValue Divisor = Op->getOperand(1);
20777 if (Subtarget->isTargetWindows())
20792 EVT VT = N->getValueType(0);
20794 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20797 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20818 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20823 bool isSigned = N->getOpcode() == ISD::SREM;
20827 if (Subtarget->isTargetWindows())
20839 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20840 return ResNode->getOperand(1);
20845 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20853 "no-stack-arg-probe")) {
20855 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20862 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20886 bool IsStrict = Op->isStrictFPOpcode();
20891 "Unexpected type for custom-lowering FP_EXTEND");
20893 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20896 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20899 // Converting from 32 -> 64 is valid if we have FP64.
20900 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20911 // Either we are converting from 16 -> 64, without FP16 and/or
20912 // FP.double-precision or without Armv8-fp. So we must do it in two
20914 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20921 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20935 "Unexpected type for custom-lowering FP_EXTEND");
20945 bool IsStrict = Op->isStrictFPOpcode();
20954 "Unexpected type for custom-lowering FP_ROUND");
20956 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20961 // Instruction from 32 -> 16 if hasFP16 is valid
20962 if (SrcSz == 32 && Subtarget->hasFP16())
20965 // Lib call from 32 -> 16 / 64 -> [32, 16]
20968 "Unexpected type for custom-lowering FP_ROUND");
20992 /// isFPImmLegal - Returns true if the target can instruction select the
20997 if (!Subtarget->hasVFP3Base())
20999 if (VT == MVT::f16 && Subtarget->hasFullFP16())
21000 return ARM_AM::getFP16Imm(Imm) != -1;
21001 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
21002 ARM_AM::getFP32FP16Imm(Imm) != -1)
21005 return ARM_AM::getFP32Imm(Imm) != -1;
21006 if (VT == MVT::f64 && Subtarget->hasFP64())
21007 return ARM_AM::getFP64Imm(Imm) != -1;
21011 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
21033 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21036 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21037 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21049 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21050 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
21069 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21070 if (!ArgTy->isVectorTy())
21074 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21077 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21078 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21091 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21092 if (!ArgTy->isVectorTy())
21096 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21108 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21110 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21113 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21122 Type *VecTy = I.getArgOperand(1)->getType();
21124 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21127 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21145 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21155 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21166 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21175 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21184 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21185 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21247 assert(Ty->isIntegerTy());
21249 unsigned Bits = Ty->getPrimitiveSizeInBits();
21265 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21268 if (!Subtarget->hasDataBarrier()) {
21270 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21272 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21285 // Only a full system barrier exists in the M-class architectures.
21286 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21292 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21299 llvm_unreachable("Invalid fence: unordered/non-atomic");
21304 if (!Inst->hasAtomicStore())
21309 if (Subtarget->preferISHSTBarriers())
21324 llvm_unreachable("Invalid fence: unordered/not-atomic");
21336 // Loads and stores less than 64-bits are already atomic; ones above that
21343 if (Subtarget->isMClass())
21345 else if (Subtarget->isThumb())
21346 has64BitAtomicStore = Subtarget->hasV7Ops();
21348 has64BitAtomicStore = Subtarget->hasV6Ops();
21350 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21355 // Loads and stores less than 64-bits are already atomic; ones above that
21361 // sections A8.8.72-74 LDRD)
21365 if (Subtarget->isMClass())
21367 else if (Subtarget->isThumb())
21368 has64BitAtomicLoad = Subtarget->hasV7Ops();
21370 has64BitAtomicLoad = Subtarget->hasV6Ops();
21372 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21378 // and up to 64 bits on the non-M profiles
21381 if (AI->isFloatingPointOperation())
21384 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21386 if (Subtarget->isMClass())
21387 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21388 else if (Subtarget->isThumb())
21389 hasAtomicRMW = Subtarget->hasV7Ops();
21391 hasAtomicRMW = Subtarget->hasV6Ops();
21392 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21393 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21397 // can never succeed. So at -O0 lower this operation to a CAS loop.
21406 // bits, and up to 64 bits on the non-M profiles.
21409 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21413 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21414 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21416 if (Subtarget->isMClass())
21417 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21418 else if (Subtarget->isThumb())
21419 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21421 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21423 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21435 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21439 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21451 F->addParamAttr(0, Attribute::AttrKind::InReg);
21456 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21463 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21471 if (!Subtarget->hasNEON())
21478 if (VectorTy->isFPOrFPVectorTy())
21486 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21487 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21498 return Subtarget->hasV6T2Ops();
21502 return Subtarget->hasV6T2Ops();
21507 if (!Subtarget->hasV7Ops())
21513 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21515 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21516 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21517 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21523 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21532 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21535 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21538 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21547 if (!Subtarget->isLittle())
21555 Type *Tys[] = { Addr->getType() };
21560 CI->addParamAttr(
21561 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21567 if (!Subtarget->hasV7Ops())
21569 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21576 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21582 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21586 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21590 if (!Subtarget->isLittle())
21596 Type *Tys[] = { Addr->getType() };
21601 Val, Strex->getFunctionType()->getParamType(0)),
21603 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21604 Val->getType()));
21610 return Subtarget->isMClass();
21626 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21628 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21634 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21636 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21640 if (VecTy->getNumElements() < 2)
21647 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21652 if (Subtarget->hasNEON() && VecSize == 64)
21658 if (Subtarget->hasNEON())
21660 if (Subtarget->hasMVEIntegerOps())
21685 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21686 Type *EltTy = VecTy->getElementType();
21688 const DataLayout &DL = LI->getDataLayout();
21689 Align Alignment = LI->getAlign();
21701 if (EltTy->isPointerTy())
21707 Value *BaseAddr = LI->getPointerOperand();
21710 // If we're going to generate more than one load, reset the sub-vector type
21712 VecTy = FixedVectorType::get(VecTy->getElementType(),
21713 VecTy->getNumElements() / NumLoads);
21719 if (Subtarget->hasNEON()) {
21720 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21726 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21730 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21738 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21741 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21749 // Holds sub-vectors extracted from the load intrinsic return values. The
21750 // sub-vectors are associated with the shufflevector instructions they will
21758 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21759 VecTy->getNumElements() * Factor);
21772 if (EltTy->isPointerTy())
21775 FixedVectorType::get(SV->getType()->getElementType(), VecTy));
21781 // Replace uses of the shufflevector instructions with the sub-vectors
21783 // associated with more than one sub-vector, those sub-vectors will be
21789 SVI->replaceAllUsesWith(WideVec);
21827 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21828 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21830 unsigned LaneLen = VecTy->getNumElements() / Factor;
21831 Type *EltTy = VecTy->getElementType();
21834 const DataLayout &DL = SI->getDataLayout();
21835 Align Alignment = SI->getAlign();
21845 Value *Op0 = SVI->getOperand(0);
21846 Value *Op1 = SVI->getOperand(1);
21851 if (EltTy->isPointerTy()) {
21856 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21864 Value *BaseAddr = SI->getPointerOperand();
21868 // and sub-vector type to something legal.
21870 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21875 auto Mask = SVI->getShuffleMask();
21879 if (Subtarget->hasNEON()) {
21883 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21887 SI->getModule(), StoreInts[Factor - 2], Tys);
21892 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21899 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21902 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21919 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21935 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21966 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21968 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21974 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21976 Members += SubMembers * AT->getNumElements();
21977 } else if (Ty->isFloatTy()) {
21982 } else if (Ty->isDoubleTy()) {
21994 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21996 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21998 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
22018 if (!ArgTy->isVectorTy())
22021 // Avoid over-aligning vector parameters. It would require realigning the
22026 /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
22027 /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
22039 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
22041 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
22049 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
22056 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
22061 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
22062 AFI->setIsSplitCSR(true);
22068 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
22069 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
22073 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
22074 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
22075 MachineBasicBlock::iterator MBBI = Entry->begin();
22085 Register NewVR = MRI->createVirtualRegister(RC);
22087 // FIXME: this currently does not emit CFI pseudo-instructions, it works
22088 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
22090 // CFI pseudo-instructions.
22091 assert(Entry->getParent()->getFunction().hasFnAttribute(
22094 Entry->addLiveIn(*I);
22095 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
22098 // Insert the copy-back instructions right before the terminator.
22100 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
22101 TII->get(TargetOpcode::COPY), *I)
22112 return Subtarget->hasMVEIntegerOps();
22121 auto *ScalarTy = VTy->getScalarType();
22122 unsigned NumElements = VTy->getNumElements();
22124 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22129 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22130 return Subtarget->hasMVEFloatOps();
22135 return Subtarget->hasMVEIntegerOps() &&
22136 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22137 ScalarTy->isIntegerTy(32));
22145 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22147 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22152 int Stride = Ty->getNumElements() / 2;
22153 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22175 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());