Lines Matching defs:AArch64
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
119 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc("Enable AArch64 logical imm instruction "
134 cl::desc("Combine extends of AArch64 masked "
160 static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2,
161 AArch64::X3, AArch64::X4, AArch64::X5,
162 AArch64::X6, AArch64::X7};
163 static const MCPhysReg FPRArgRegs[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
164 AArch64::Q3, AArch64::Q4, AArch64::Q5,
165 AArch64::Q6, AArch64::Q7};
167 ArrayRef<MCPhysReg> llvm::AArch64::getGPRArgRegs() { return GPRArgRegs; }
169 ArrayRef<MCPhysReg> llvm::AArch64::getFPRArgRegs() { return FPRArgRegs; }
237 VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock;
369 AddrDisc = DAG->getRegister(AArch64::NoRegister, MVT::i64);
379 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
387 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
388 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
391 addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
397 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
398 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
399 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
400 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
401 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
405 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
406 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
429 addRegisterClass(MVT::nxv1i1, &AArch64::PPRRegClass);
430 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
431 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
432 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
433 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
436 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
437 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
438 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
439 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
441 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
442 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
443 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
444 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
445 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
446 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
448 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
449 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
450 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
455 addRegisterClass(VT, &AArch64::ZPRRegClass);
459 addRegisterClass(VT, &AArch64::ZPRRegClass);
464 addRegisterClass(MVT::aarch64svcount, &AArch64::PPRRegClass);
630 // AArch64 lacks both left-rotate and popcount instructions.
638 // AArch64 doesn't have i32 MULH{S|U}.
642 // AArch64 doesn't have {U|S}MUL_LOHI.
874 // AArch64 has implementations of a lot of rounding-like FP operations.
1037 // AArch64 does not have floating-point extending loads, i1 sign-extending
1163 setStackPointerRegisterToSaveRestore(AArch64::SP);
1230 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1256 // when AArch64 doesn't have fullfp16 support, promote the input
2227 addRegisterClass(VT, &AArch64::FPR64RegClass);
2233 addRegisterClass(VT, &AArch64::FPR128RegClass);
2389 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2392 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2395 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2643 return AArch64::createFastISel(funcInfo, libInfo);
3023 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
3024 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
3032 TrueBB->addLiveIn(AArch64::NZCV);
3033 EndBB->addLiveIn(AArch64::NZCV);
3036 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
3092 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));
3094 MIB.addReg(AArch64::ZA, RegState::Define);
3128 bool HasTile = BaseReg != AArch64::ZA;
3158 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
3164 MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine);
3181 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui))
3186 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui))
3187 .addReg(AArch64::WZR)
3190 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui))
3191 .addReg(AArch64::WZR)
3222 Register SP = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3224 .addReg(AArch64::SP);
3229 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MSUBXrrr), Dest)
3234 AArch64::SP)
3260 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::SUBXrx64), AArch64::SP)
3261 .addReg(AArch64::SP)
3265 .addReg(AArch64::SP);
3286 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
3288 .addReg(AArch64::X0, RegState::ImplicitDefine)
3294 .addReg(AArch64::X0);
3298 .addReg(AArch64::XZR);
3306 int SMEOrigInstr = AArch64::getSMEPseudoMap(MI.getOpcode());
3310 TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
3312 case (AArch64::SMEMatrixArray):
3313 return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB);
3314 case (AArch64::SMEMatrixTileB):
3315 return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB);
3316 case (AArch64::SMEMatrixTileH):
3317 return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB);
3318 case (AArch64::SMEMatrixTileS):
3319 return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB);
3320 case (AArch64::SMEMatrixTileD):
3321 return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB);
3322 case (AArch64::SMEMatrixTileQ):
3323 return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB);
3333 case AArch64::InitTPIDR2Obj:
3335 case AArch64::AllocateZABuffer:
3337 case AArch64::AllocateSMESaveBuffer:
3339 case AArch64::GetSMESaveSize:
3341 case AArch64::F128CSEL:
3351 AArch64::LR, /*isDef*/ true,
3363 case AArch64::CATCHRET:
3366 case AArch64::PROBED_STACKALLOC_DYN:
3369 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3370 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
3371 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3372 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0, MI, BB);
3373 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3374 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0, MI, BB);
3375 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3376 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0, MI, BB);
3377 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3378 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0, MI, BB);
3379 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3380 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0, MI, BB);
3381 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3382 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0, MI, BB);
3383 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3384 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0, MI, BB);
3385 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3386 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0, MI, BB);
3387 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3388 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
3389 case AArch64::LDR_ZA_PSEUDO:
3391 case AArch64::LDR_TX_PSEUDO:
3392 return EmitZTInstr(MI, BB, AArch64::LDR_TX, /*Op0IsDef=*/true);
3393 case AArch64::STR_TX_PSEUDO:
3394 return EmitZTInstr(MI, BB, AArch64::STR_TX, /*Op0IsDef=*/false);
3395 case AArch64::ZERO_M_PSEUDO:
3397 case AArch64::ZERO_T_PSEUDO:
3398 return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
3399 case AArch64::MOVT_TIZ_PSEUDO:
3400 return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true);
3405 // AArch64 Lowering private implementation.
3438 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
3467 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
3528 /// Convert a DAG fp condition code to an AArch64 CC.
3557 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
4084 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
4243 // Emit the AArch64 operation with overflow check.
4682 SDValue Result = DAG.getTargetExtractSubreg(AArch64::hsub, dl, VT, Narrow);
4822 // AArch64 FP-to-int conversions saturate to the destination element size, so
4926 // AArch64 FP-to-int conversions saturate to the destination register size, so
5321 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, OpVT, Op);
5455 DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
5467 const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
5527 DAG.getConstant(AArch64::ReservedFPControlBits, DL, MVT::i64));
5998 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG, DL,
6031 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
6973 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
6974 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
7922 RC = &AArch64::GPR32RegClass;
7924 RC = &AArch64::GPR64RegClass;
7926 RC = &AArch64::FPR16RegClass;
7928 RC = &AArch64::FPR32RegClass;
7930 RC = &AArch64::FPR64RegClass;
7932 RC = &AArch64::FPR128RegClass;
7936 RC = &AArch64::PPRRegClass;
7939 RC = &AArch64::PPRRegClass;
7942 RC = &AArch64::ZPRRegClass;
8020 Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
8182 if (!CCInfo.isAllocated(AArch64::X8)) {
8183 Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
8184 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
8278 MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
8314 auto GPRArgRegs = AArch64::getGPRArgRegs();
8337 // compute its address relative to x4. For a normal AArch64->AArch64
8340 Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
8349 Register VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
8365 auto FPRArgRegs = AArch64::getFPRArgRegs();
8377 Register VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
8613 // tail-called on AArch64 when the OS does not support dynamic
8770 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
8771 RegClass = &AArch64::ZPR2StridedOrContiguousRegClass;
8773 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
8774 RegClass = &AArch64::ZPR4StridedOrContiguousRegClass;
8809 if (MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
8810 MI.getOpcode() == AArch64::MSRpstatePseudo) {
8814 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
8815 AArch64::GPR64RegClass.contains(MO.getReg())))
8821 MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false,
8823 MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/true,
8828 if (MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
8829 MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) {
8842 MIB.addImm(AArch64::zsub0 + (I - 1));
8856 (MI.getOpcode() == AArch64::ADDXri ||
8857 MI.getOpcode() == AArch64::SUBXri)) {
8861 MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false,
8994 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
8995 AArch64::PPRRegClass.contains(Loc.getLocReg());
9169 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
9420 RegsToPass.emplace_back(AArch64::X4, ParamPtr);
9421 RegsToPass.emplace_back(AArch64::X5,
9655 Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
9658 {Result, TPIDR2_EL0, DAG.getRegister(AArch64::X0, MVT::i64),
9808 // Windows AArch64 ABIs require that for returning structs by value we copy
9816 unsigned RetValReg = AArch64::X0;
9818 RetValReg = AArch64::X8;
9829 if (AArch64::GPR64RegClass.contains(*I))
9831 else if (AArch64::FPR64RegClass.contains(*I))
9911 return SDValue(DAG.getMachineNode(AArch64::LOADgotAUTH, DL, Ty, GotAddr),
10058 // normal AArch64 call node: x0 takes the address of the descriptor, and
10060 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
10072 Ops.push_back(DAG.getRegister(AArch64::NoRegister, MVT::i64)); // Addr Disc.
10075 Ops.push_back(DAG.getRegister(AArch64::X0, MVT::i64));
10079 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
10101 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
10116 Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
10120 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
10136 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
10139 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
10159 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
10162 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
10165 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
10206 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
10277 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
10280 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
10307 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
10350 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
10420 return SDValue(DAG.getMachineNode(AArch64::LOADauthptrstatic, DL, MVT::i64,
10474 : DAG.getRegister(AArch64::XZR, MVT::i64);
10480 DAG.getMachineNode(AArch64::MOVaddrPAC, DL, MVT::i64,
10489 DAG.getMachineNode(AArch64::LOADgotPAC, DL, MVT::i64,
10635 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
10709 SetVecVal(AArch64::dsub);
10712 SetVecVal(AArch64::ssub);
10715 SetVecVal(AArch64::hsub);
10736 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, BSP);
10738 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, BSP);
10740 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, BSP);
11091 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
11325 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
11395 unsigned BlockSize = AArch64::SVEBitsPerBlock / Ty.getVectorMinNumElements();
11478 TVal = DAG.getTargetInsertSubreg(AArch64::hsub, DL, MVT::f32,
11480 FVal = DAG.getTargetInsertSubreg(AArch64::hsub, DL, MVT::f32,
11487 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, Ty, Res);
11536 SDValue X16Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X16,
11538 SDNode *B = DAG.getMachineNode(AArch64::BR_JumpTable, DL, MVT::Other,
11545 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
11558 Dest->getMachineOpcode() == AArch64::JumpTableDest32)
11571 SDValue AddrDisc = DAG.getRegister(AArch64::XZR, MVT::i64);
11573 SDNode *BrA = DAG.getMachineNode(AArch64::BRA, DL, MVT::Other,
11611 SDValue AddrDisc = DAG.getRegister(AArch64::XZR, MVT::i64);
11614 DAG.getMachineNode(AArch64::MOVaddrPAC, DL, {MVT::Other, MVT::Glue},
11616 return DAG.getCopyFromReg(SDValue(MOV, 0), DL, AArch64::X16, MVT::i64,
11653 // relative to x4. For a normal AArch64->AArch64 call, x4 == sp on entry,
11655 Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
11677 // The layout of the va_list struct is specified in the AArch64 Procedure Call
11866 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
11896 if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
11917 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
11942 Register Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
11952 St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
11956 DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
11957 St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
12024 // AArch64 Optimization Hooks
12087 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
12116 // AArch64 reciprocal iteration instruction: (2 - M * N)
12131 // AArch64 Inline Assembly Support
12206 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
12208 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
12228 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
12229 : &AArch64::PPR_p8to15RegClass;
12231 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
12232 : &AArch64::PPR_3bRegClass;
12234 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
12235 : &AArch64::PPRRegClass;
12258 return &AArch64::MatrixIndexGPR32_8_11RegClass;
12260 return &AArch64::MatrixIndexGPR32_12_15RegClass;
12314 Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32, Glue);
12317 Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32);
12415 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
12417 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
12418 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
12424 return std::make_pair(0U, &AArch64::ZPRRegClass);
12431 return std::make_pair(0U, &AArch64::FPR16RegClass);
12433 return std::make_pair(0U, &AArch64::FPR32RegClass);
12435 return std::make_pair(0U, &AArch64::FPR64RegClass);
12437 return std::make_pair(0U, &AArch64::FPR128RegClass);
12446 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
12448 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
12454 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
12470 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
12473 return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass);
12477 return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
12497 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
12498 Res.second = &AArch64::FPR64RegClass;
12500 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
12501 Res.second = &AArch64::FPR128RegClass;
12508 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
12509 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
12548 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
12550 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
12668 // AArch64 Advanced SIMD Support
14075 if (VT.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock)
14393 unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
15914 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
16175 SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
16181 Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
16199 Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
16202 Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
16212 SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
16218 Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
16240 SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
17755 // AArch64 has five basic addressing modes:
17890 AArch64::X16, AArch64::X17, AArch64::LR, 0
17896 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
19863 // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
19879 // For AArch64 architectures, sequences like the following:
20208 // AArch64 high-vector "long" operations are formed by performing the non-high
20282 /// Helper structure to keep track of a SET_CC lowered into AArch64 code.
20291 AArch64SetCCInfo AArch64;
20304 /// AArch64 lowered one.
20325 SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
20327 SetCCInfo.Info.AArch64.CC =
20344 SetCCInfo.Info.AArch64.CC =
20345 AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
20386 ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
20396 AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
20398 Cmp = *InfoAndKind.Info.AArch64.Cmp;
20949 // Bail out when value type is not one of {i32, i64}, since AArch64 ADD with
20969 // On many AArch64 processors (Cortex A78, Neoverse N1/N2/V1, etc), ADD with
21569 if (VT.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock)
22578 if (VT.getSizeInBits().getKnownMinValue() > AArch64::SVEBitsPerBlock)
22756 ZeroReg = AArch64::WZR;
22759 ZeroReg = AArch64::XZR;
23411 Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
24048 Subtarget.getMaxSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
24199 Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
24336 // when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
25603 if (SrcVT.getSizeInBits().getKnownMinValue() > AArch64::SVEBitsPerBlock)
25714 if (RetVT.getSizeInBits().getKnownMinValue() > AArch64::SVEBitsPerBlock)
26183 // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
26248 // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
26980 Op = DAG.getTargetInsertSubreg(AArch64::hsub, DL, MVT::f32,
27089 DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
27090 SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
27091 SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
27118 Opcode = AArch64::CASPX;
27121 Opcode = AArch64::CASPAX;
27124 Opcode = AArch64::CASPLX;
27128 Opcode = AArch64::CASPALX;
27138 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
27154 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
27157 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
27160 Opcode = AArch64::CMP_SWAP_128_RELEASE;
27164 Opcode = AArch64::CMP_SWAP_128;
27200 return AArch64::LDCLRP;
27203 return AArch64::LDCLRPA;
27206 return AArch64::LDCLRPL;
27210 return AArch64::LDCLRPAL;
27220 return AArch64::LDSETP;
27223 return AArch64::LDSETPA;
27226 return AArch64::LDSETPL;
27230 return AArch64::LDSETPAL;
27240 return AArch64::SWPP;
27243 return AArch64::SWPPA;
27246 return AArch64::SWPPL;
27250 return AArch64::SWPPAL;
27426 // Non-volatile or atomic loads are optimized later in AArch64's load/store
27438 assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
28002 return AArch64::X0;
28010 return AArch64::X1;
28068 if (AArch64::GPR64RegClass.contains(*I))
28069 RC = &AArch64::GPR64RegClass;
28070 else if (AArch64::FPR64RegClass.contains(*I))
28071 RC = &AArch64::FPR64RegClass;
28097 // Integer division on AArch64 is expensive. However, when aggressively
28100 // The exception to this is vector division. Since AArch64 doesn't have vector
28153 case AArch64::BLR:
28154 case AArch64::BLRNoIP:
28155 case AArch64::TCRETURNri:
28156 case AArch64::TCRETURNrix16x17:
28157 case AArch64::TCRETURNrix17:
28158 case AArch64::TCRETURNrinotx16:
28168 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(AArch64::KCFI_CHECK))
28282 case AArch64::ADRP:
28283 case AArch64::G_ADD_LOW:
29212 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
29383 unsigned MinNumElts = AArch64::SVEBitsPerBlock / BitsPerElt;
29510 // equivalent to an AArch64 instruction. There's the extra component of
29695 MaxSVEVectorSizeInBits = AArch64::SVEMaxBitsPerVector;