Lines Matching +full:tri +full:- +full:default +full:- +full:2

1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
75 #define DEBUG_TYPE "arm-instrinfo"
81 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
82 cl::desc("Enable ARM 2-addr to 3-addr conv"));
84 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
134 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
135 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
141 // - pre-RA scheduling
142 // - post-RA scheduling when FeatureUseMISched is set
148 // post-RA scheduling; we can tell that we're post-RA because we don't
150 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
151 // banks banked on bit 2. Assume that TCMs are in use.
152 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
153 MHR->AddHazardRecognizer(
160 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
164 // Called during post-RA scheduling when FeatureUseMISched is not set
171 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
175 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
187 MachineFunction &MF = *MI.getParent()->getParent();
191 default: return nullptr;
199 // Try splitting an indexed load/store to an un-indexed one plus an add/sub
212 const MachineOperand &Base = MI.getOperand(2);
213 const MachineOperand &Offset = MI.getOperand(NumOps - 3);
217 unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
218 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
220 default: llvm_unreachable("Unknown indexed op!");
225 if (ARM_AM::getSOImmVal(Amt) == -1)
259 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
309 UpdateMI->getOperand(0).setIsDead();
320 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
324 LV->addVirtualRegisterDead(Reg, *NewMI);
327 for (unsigned j = 0; j < 2; ++j) {
330 if (!NewMI->readsRegister(Reg, /*TRI=*/nullptr))
332 LV->addVirtualRegisterKilled(Reg, *NewMI);
351 // 2 elements indicates a conditional branch; the elements are
367 --I;
371 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
379 while (I->isDebugInstr() || !I->isTerminator() ||
380 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
381 I->getOpcode() == ARM::t2DoLoopStartTP){
384 --I;
387 if (isIndirectBranchOpcode(I->getOpcode()) ||
388 isJumpTableBranchOpcode(I->getOpcode())) {
392 } else if (isUncondBranchOpcode(I->getOpcode())) {
393 TBB = I->getOperand(0).getMBB();
394 } else if (isCondBranchOpcode(I->getOpcode())) {
401 TBB = I->getOperand(0).getMBB();
402 Cond.push_back(I->getOperand(1));
403 Cond.push_back(I->getOperand(2));
404 } else if (I->isReturn()) {
407 } else if (I->getOpcode() == ARM::t2LoopEnd &&
409 ->getSubtarget<ARMSubtarget>()
414 TBB = I->getOperand(1).getMBB();
415 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
416 Cond.push_back(I->getOperand(0));
423 // Cleanup code - to be run for unpredicated unconditional branches and
426 (isUncondBranchOpcode(I->getOpcode()) ||
427 isIndirectBranchOpcode(I->getOpcode()) ||
428 isJumpTableBranchOpcode(I->getOpcode()) ||
429 I->isReturn())) {
430 // Forget any previous condition branch information - it no longer applies.
464 --I;
467 // We made it past the terminators without bailing out - we must have
480 if (!isUncondBranchOpcode(I->getOpcode()) &&
481 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
485 I->eraseFromParent();
490 --I;
491 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
495 I->eraseFromParent();
496 return 2;
506 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
507 int BOpc = !AFI->isThumbFunction()
508 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
509 int BccOpc = !AFI->isThumbFunction()
510 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
511 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
515 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
526 } else if (Cond.size() == 2) {
536 // Two-way conditional branch.
537 if (Cond.size() == 2)
548 return 2;
553 if (Cond.size() == 2) {
564 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
565 while (++I != E && I->isInsideBundle()) {
566 int PIdx = I->findFirstPredOperandIdx();
567 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
574 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
579 const TargetRegisterInfo *TRI) const {
583 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
607 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
614 if (PIdx != -1) {
638 if (Pred1.size() > 2 || Pred2.size() > 2)
647 default:
694 switch (MI->getOpcode()) {
695 default: return true;
722 /// isPredicable - Return true if the specified instruction can be predicated.
723 /// By default, this returns true for every instruction with a
735 const MachineFunction *MF = MI.getParent()->getParent();
737 MF->getInfo<ARMFunctionInfo>();
746 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
752 if (AFI->isThumb2Function()) {
763 for (const MachineOperand &MO : MI->operands()) {
777 /// GetInstSize - Return the size of the specified MachineInstr.
782 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
787 default:
789 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
790 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
791 // contrast to AArch64 instructions which have a default size of 4 bytes for
802 // operand #2.
803 return MI.getOperand(2).getImm();
810 if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
820 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
821 while (++I != E && I->isInsideBundle()) {
822 assert(!I->isBundle() && "No nested bundle!");
837 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
840 // APSR. However, there are lots of other possibilities on M-class cores.
856 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
943 SubRegs = 2;
952 SubRegs = 2;
964 SubRegs = 2;
968 SubRegs = 2;
969 Spacing = 2;
974 Spacing = 2;
979 Spacing = 2;
984 SubRegs = 2;
993 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
999 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
1005 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1011 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1017 assert(Opc && "Impossible reg-to-reg copy");
1019 const TargetRegisterInfo *TRI = &getRegisterInfo();
1023 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1024 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1025 Spacing = -Spacing;
1031 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1032 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1033 assert(Dst && Src && "Bad sub-register");
1038 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1052 // Add implicit super-register defs and kills to the last instruction.
1053 Mov->addRegisterDefined(DestReg, TRI);
1055 Mov->addRegisterKilled(SrcReg, TRI);
1069 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1078 Register DstReg = DstSrcPair->Destination->getReg();
1107 const TargetRegisterInfo *TRI) const {
1112 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1120 const TargetRegisterInfo *TRI,
1130 switch (TRI->getSpillSize(*RC)) {
1131 case 2:
1178 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1179 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1189 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1190 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1240 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1241 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1242 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1254 // spilled def has a sub-register index.
1272 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1273 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1274 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1275 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1292 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1293 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1294 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1295 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1296 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1297 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1298 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1299 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1303 default:
1311 default: break;
1314 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1315 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1328 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1329 MI.getOperand(2).getImm() == 0) {
1337 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1339 return MI.getOperand(2).getReg();
1366 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1367 ->getFrameIndex();
1377 const TargetRegisterInfo *TRI,
1380 if (I != MBB.end()) DL = I->getDebugLoc();
1388 switch (TRI->getSpillSize(*RC)) {
1389 case 2:
1433 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1434 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1444 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1445 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1491 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1492 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1493 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1520 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1521 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1522 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1523 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1541 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1542 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1543 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1544 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1545 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1546 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1547 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1548 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1554 default:
1562 default: break;
1565 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1566 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1579 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1580 MI.getOperand(2).getImm() == 0) {
1623 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1624 ->getFrameIndex();
1637 DebugLoc dl = MI->getDebugLoc();
1638 MachineBasicBlock *BB = MI->getParent();
1641 if (isThumb1 || !MI->getOperand(1).isDead()) {
1642 MachineOperand LDWb(MI->getOperand(1));
1643 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1648 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1651 if (isThumb1 || !MI->getOperand(0).isDead()) {
1652 MachineOperand STWb(MI->getOperand(0));
1653 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1658 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1661 MachineOperand LDBase(MI->getOperand(3));
1664 MachineOperand STBase(MI->getOperand(2));
1668 const TargetRegisterInfo &TRI = getRegisterInfo();
1670 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1673 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1674 return TRI.getEncodingValue(Reg1) <
1675 TRI.getEncodingValue(Reg2);
1683 BB->erase(MI);
1689 MI.getParent()->erase(MI);
1705 // Look for a copy between even S-registers. That is where we keep floats
1712 const TargetRegisterInfo *TRI = &getRegisterInfo();
1713 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1715 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1722 // sub-register insertion.
1723 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1732 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1734 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1735 // or some other super-register.
1736 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1737 if (ImpDefIdx != -1)
1754 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1757 MI.addRegisterKilled(SrcRegS, TRI, true);
1770 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1776 unsigned PCLabelId = AFI->createPICLabelUId();
1781 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1784 if (ACPV->isGlobalValue())
1786 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1787 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1788 else if (ACPV->isExtSymbol())
1791 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1792 else if (ACPV->isBlockAddress())
1794 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1796 else if (ACPV->isLSDA())
1799 else if (ACPV->isMachineBasicBlock())
1802 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1805 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1813 const TargetRegisterInfo &TRI) const {
1816 default: {
1817 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1818 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1843 switch (I->getOpcode()) {
1847 unsigned CPI = I->getOperand(1).getIndex();
1849 I->getOperand(1).setIndex(CPI);
1850 I->getOperand(2).setImm(PCLabelId);
1854 if (!I->isBundledWithSucc())
1888 const MachineFunction *MF = MI0.getParent()->getParent();
1889 const MachineConstantPool *MCP = MF->getConstantPool();
1892 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1893 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1901 return ACPV0->hasSameValue(ACPV1);
1919 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1920 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1940 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1946 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1954 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1959 default:
1980 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1981 !IsLoadOpcode(Load2->getMachineOpcode()))
1985 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1986 Load1->getOperand(4) != Load2->getOperand(4))
1990 if (Load1->getOperand(3) != Load2->getOperand(3))
1994 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1995 isa<ConstantSDNode>(Load2->getOperand(1))) {
1996 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1997 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
2004 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2013 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2023 if ((Offset2 - Offset1) / 8 > 64)
2031 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2032 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2033 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2034 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2035 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2076 while (++I != MBB->end() && I->isDebugInstr())
2078 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2082 // a stack-oriented pointer, as it's unlikely to be profitable. This
2086 // Calls don't actually change the stack pointer, even if they have imp-defs.
2089 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
2105 if (MBB.getParent()->getFunction().hasOptSize()) {
2107 if (!Pred->empty()) {
2108 MachineInstr *LastMI = &*Pred->rbegin();
2109 if (LastMI->getOpcode() == ARM::t2Bcc) {
2110 const TargetRegisterInfo *TRI = &getRegisterInfo();
2111 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2134 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2162 PredCost -= 1 * ScalingUpFactor;
2171 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2188 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2196 return divideCeil(NumInsts, MaxInsts) * 2;
2210 // For Thumb2, all branches are 32-bit instructions during the if conversion
2211 // pass, but may be replaced with 16-bit instructions during size reduction.
2217 Size /= 2;
2225 // Reduce false anti-dependencies to let the target's out-of-order execution
2230 /// getInstrPredicate - If instruction is predicated, returns its predicate
2236 if (PIdx == -1) {
2274 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2297 // Check if MI has any non-dead defs or physreg uses. This also detects
2299 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2314 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2328 // 2: False use.
2332 FalseOp = 2;
2346 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2347 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2355 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2356 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2368 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2371 const MCInstrDesc &DefDesc = DefMI->getDesc();
2374 NewMI.add(DefMI->getOperand(i));
2383 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2384 if (NewMI->hasOptionalDef())
2393 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2403 if (DefMI->getParent() != MI.getParent())
2404 NewMI->clearKillInfo();
2407 DefMI->eraseFromParent();
2485 if (isSub) NumBytes = -NumBytes;
2495 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2513 // micro-operations, it's only really a great benefit to code-size.
2520 bool IsPop = isPopOpcode(MI->getOpcode());
2521 bool IsPush = isPushOpcode(MI->getOpcode());
2525 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2526 MI->getOpcode() == ARM::VLDMDIA_UPD;
2527 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2528 MI->getOpcode() == ARM::tPOP ||
2529 MI->getOpcode() == ARM::tPOP_RET;
2531 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2532 MI->getOperand(1).getReg() == ARM::SP)) &&
2533 "trying to fold sp update into non-sp-updating push/pop");
2535 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2536 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2543 int RegListIdx = IsT1PushPop ? 2 : 4;
2557 // re-adding them since the order matters, so save the existing ones
2563 unsigned FirstRegEnc = -1;
2565 const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
2566 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2567 MachineOperand &MO = MI->getOperand(i);
2571 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2572 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2575 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2578 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2579 --CurRegEnc) {
2580 unsigned CurReg = RegClass->getRegister(CurRegEnc);
2581 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2589 --RegsNeeded;
2596 // callee-saved.
2598 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2611 --RegsNeeded;
2620 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2621 MI->removeOperand(i);
2653 Offset = -Offset;
2659 if (ARM_AM::getSOImmVal(Offset) != -1) {
2676 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2691 ImmIdx = FrameRegIdx+2;
2694 InstrOffs *= -1;
2698 ImmIdx = FrameRegIdx+2;
2701 InstrOffs *= -1;
2712 InstrOffs *= -1;
2720 InstrOffs *= -1;
2722 Scale = 2;
2730 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2733 default:
2738 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2740 Offset = -Offset;
2749 unsigned Mask = (1 << NumBits) - 1;
2758 ImmedOffset = -ImmedOffset;
2771 ImmedOffset = -ImmedOffset;
2780 Offset = (isSub) ? -Offset : Offset;
2784 /// analyzeCompare - For a comparison instruction, return the source registers
2792 default: break;
2821 /// isSuitableForMask - Identify a suitable 'and' instruction that
2823 /// as a 'tst' instruction. Provide a limited look-through for copies.
2827 switch (MI->getOpcode()) {
2830 if (CmpMask != MI->getOperand(2).getImm())
2832 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2840 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2845 default: return ARMCC::AL;
2853 /// isRedundantFlagInstr - check whether the first instruction, whose only
2864 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2865 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2866 ((OI->getOperand(1).getReg() == SrcReg &&
2867 OI->getOperand(2).getReg() == SrcReg2) ||
2868 (OI->getOperand(1).getReg() == SrcReg2 &&
2869 OI->getOperand(2).getReg() == SrcReg))) {
2874 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2875 ((OI->getOperand(2).getReg() == SrcReg &&
2876 OI->getOperand(3).getReg() == SrcReg2) ||
2877 (OI->getOperand(2).getReg() == SrcReg2 &&
2878 OI->getOperand(3).getReg() == SrcReg))) {
2883 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2884 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2885 OI->getOperand(1).getReg() == SrcReg &&
2886 OI->getOperand(2).getImm() == ImmValue) {
2891 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2892 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2893 OI->getOperand(2).getReg() == SrcReg &&
2894 OI->getOperand(3).getImm() == ImmValue) {
2899 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2900 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2901 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2902 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2903 OI->getOperand(0).getReg() == SrcReg &&
2904 OI->getOperand(1).getReg() == SrcReg2) {
2909 if (CmpI->getOpcode() == ARM::tCMPr &&
2910 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2911 OI->getOpcode() == ARM::tADDrr) &&
2912 OI->getOperand(0).getReg() == SrcReg &&
2913 OI->getOperand(2).getReg() == SrcReg2) {
2922 switch (MI->getOpcode()) {
2923 default: return false;
3007 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
3018 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3026 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3028 if (UI->getParent() != CmpInstr.getParent())
3043 B = CmpInstr.getParent()->begin();
3057 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3086 const TargetRegisterInfo *TRI = &getRegisterInfo();
3088 --I;
3089 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3091 for (; I != E; --I) {
3092 if (I->getOpcode() != ARM::tMOVi8) {
3098 MI = MI->removeFromParent();
3100 CmpInstr.getParent()->insert(E, MI);
3111 const MachineInstr &Instr = *--I;
3124 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3125 Instr.readsRegister(ARM::CPSR, TRI))
3131 // In some cases, we scan the use-list of an instruction for an AND;
3150 // We can't use a predicated instruction - it doesn't always write the flags.
3159 // live-out.
3164 E = CmpInstr.getParent()->end();
3184 default:
3186 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3218 unsigned Opc = SubAdd->getOpcode();
3223 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3225 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3226 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3235 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3264 // If CPSR is not killed nor re-defined, we should check whether it is
3265 // live-out. If it is live-out, do not optimize.
3268 for (MachineBasicBlock *Succ : MBB->successors())
3269 if (Succ->isLiveIn(ARM::CPSR))
3273 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3276 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3277 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3278 MI->getOperand(CPSRRegNum).setIsDef(true);
3287 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3289 MI->clearRegisterDeads(ARM::CPSR);
3305 if (Next != MI.getParent()->end() &&
3324 if (!MRI->hasOneNonDBGUse(Reg))
3330 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3340 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3352 default: return false;
3361 Commute = UseMI.getOperand(2).getReg() != Reg;
3363 default: break;
3373 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3374 ImmVal = -ImmVal;
3388 default: break;
3405 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3406 ImmVal = -ImmVal;
3421 default: break;
3430 unsigned OpIdx = Commute ? 2 : 1;
3433 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3434 Register NewReg = MRI->createVirtualRegister(TRC);
3444 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3456 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3464 default: {
3466 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3480 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3483 return 2;
3488 if (!MI.getOperand(2).getReg())
3496 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3499 return 2;
3504 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3524 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3526 return 2;
3537 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3539 return 2;
3548 return 2;
3551 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3559 return (Rt == Rm) ? 3 : 2;
3573 return 2;
3588 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3596 Register Rn = MI.getOperand(2).getReg();
3601 return (Rt == Rn) ? 3 : 2;
3609 return 2;
3665 return 2;
3669 Register Rn = MI.getOperand(2).getReg();
3670 return (Rt == Rn) ? 3 : 2;
3683 return 2;
3687 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3702 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3714 Size += (*I)->getSize().getValue();
3717 // the values can actually go up to 32 for floating-point load/store
3728 default:
3757 UOps += 2; // One for base reg wb, one for write to pc.
3765 if (!ItinData || ItinData->isEmpty())
3770 int ItinUOps = ItinData->getNumMicroOps(Class);
3780 default:
3781 llvm_unreachable("Unexpected multi-uops instruction!");
3784 return 2;
3789 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3791 // separately by assuming the address is not 64-bit aligned.
3793 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3794 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3795 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3808 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3809 return (NumRegs / 2) + (NumRegs % 2) + 1;
3844 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3853 return 2;
3854 // 4 registers would be issued: 2, 2.
3855 // 5 registers would be issued: 2, 2, 1.
3856 unsigned UOps = (NumRegs / 2);
3857 if (NumRegs % 2)
3862 unsigned UOps = (NumRegs / 2);
3863 // If there are odd number of registers or if it's not 64-bit aligned,
3865 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3866 (*MI.memoperands_begin())->getAlign() < Align(8))
3880 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3883 return ItinData->getOperandCycle(DefClass, DefIdx);
3887 // (regno / 2) + (regno % 2) + 1
3888 DefCycle = RegNo / 2 + 1;
3889 if (RegNo % 2)
3896 default: break;
3904 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3906 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3910 DefCycle = RegNo + 2;
3920 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3923 return ItinData->getOperandCycle(DefClass, DefIdx);
3927 // 4 registers would be issued: 1, 2, 1.
3928 // 5 registers would be issued: 1, 2, 2.
3929 DefCycle = RegNo / 2;
3932 // Result latency is issue cycle + 2: E2.
3933 DefCycle += 2;
3935 DefCycle = (RegNo / 2);
3936 // If there are odd number of registers or if it's not 64-bit aligned,
3938 if ((RegNo % 2) || DefAlign < 8)
3940 // Result latency is AGU cycles + 2.
3941 DefCycle += 2;
3944 DefCycle = RegNo + 2;
3954 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3956 return ItinData->getOperandCycle(UseClass, UseIdx);
3960 // (regno / 2) + (regno % 2) + 1
3961 UseCycle = RegNo / 2 + 1;
3962 if (RegNo % 2)
3969 default: break;
3977 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3979 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3983 UseCycle = RegNo + 2;
3993 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3995 return ItinData->getOperandCycle(UseClass, UseIdx);
3999 UseCycle = RegNo / 2;
4000 if (UseCycle < 2)
4001 UseCycle = 2;
4003 UseCycle += 2;
4005 UseCycle = (RegNo / 2);
4006 // If there are odd number of registers or if it's not 64-bit aligned,
4008 if ((RegNo % 2) || UseAlign < 8)
4025 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4033 default:
4034 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4069 // We can't seem to determine the result latency of the def, assume it's 2.
4070 DefCycle = 2;
4074 default:
4075 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4113 UseCycle = *DefCycle - *UseCycle + 1;
4118 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4120 UseCycle = *UseCycle - 1;
4121 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4123 UseCycle = *UseCycle - 1;
4130 static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
4137 assert(II->isInsideBundle() && "Empty bundle?");
4139 int Idx = -1;
4140 while (II->isInsideBundle()) {
4141 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
4142 if (Idx != -1)
4144 --II;
4148 assert(Idx != -1 && "Cannot find bundled definition!");
4153 static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
4159 assert(II->isInsideBundle() && "Empty bundle?");
4160 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4163 int Idx = -1;
4164 while (II != E && II->isInsideBundle()) {
4165 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
4166 if (Idx != -1)
4168 if (II->getOpcode() != ARM::t2IT)
4173 if (Idx == -1) {
4190 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4193 default: break;
4199 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4200 --Adjust;
4209 if (ShAmt == 0 || ShAmt == 2)
4210 --Adjust;
4218 default: break;
4226 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4228 Adjust -= 2;
4231 --Adjust;
4240 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4241 Adjust -= 2;
4249 default: break;
4354 // If the address is not 64-bit aligned, the latencies of these
4367 if (!ItinData || ItinData->isEmpty())
4378 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4379 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4393 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4394 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4404 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4415 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4417 // incur a code size penalty (not able to use the CPSR setting 16-bit
4420 const MachineFunction *MF = DefMI.getParent()->getParent();
4422 if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4423 --Latency;
4432 ? (*DefMI.memoperands_begin())->getAlign().value()
4435 ? (*UseMI.memoperands_begin())->getAlign().value()
4448 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4450 if (Adj >= 0 || (int)*Latency > -Adj) {
4461 if (!DefNode->isMachineOpcode())
4464 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4469 if (!ItinData || ItinData->isEmpty())
4472 if (!UseNode->isMachineOpcode()) {
4474 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4477 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4480 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4482 unsigned DefAlign = !DefMN->memoperands_empty()
4483 ? (*DefMN->memoperands_begin())->getAlign().value()
4486 unsigned UseAlign = !UseMN->memoperands_empty()
4487 ? (*UseMN->memoperands_begin())->getAlign().value()
4497 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4500 default: break;
4503 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4506 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4507 Latency = *Latency - 1;
4515 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4516 if (ShAmt == 0 || ShAmt == 2)
4517 Latency = *Latency - 1;
4521 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4525 default: break;
4528 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4531 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4533 Latency = *Latency - 2;
4535 Latency = *Latency - 1;
4542 // Thumb2 mode: lsl 0-3 only.
4543 Latency = *Latency - 2;
4550 default: break;
4707 // If the address is not 64-bit aligned, the latencies of these
4747 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4748 while (++I != E && I->isInsideBundle()) {
4749 if (I->getOpcode() != ARM::t2IT)
4770 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4774 unsigned Latency = ItinData->getStageLatency(Class);
4776 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4778 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4780 if (Adj >= 0 || (int)Latency > -Adj) {
4788 if (!Node->isMachineOpcode())
4791 if (!ItinData || ItinData->isEmpty())
4794 unsigned Opcode = Node->getMachineOpcode();
4796 default:
4797 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4800 return 2;
4829 if (!ItinData || ItinData->isEmpty())
4836 ItinData->getOperandCycle(DefClass, DefIdx);
4837 return DefCycle && DefCycle <= 2U;
4849 // Make sure we don't generate a lo-lo mov that isn't supported.
4852 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4859 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4874 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4875 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4886 default:
4919 MachineBasicBlock &MBB = *MI->getParent();
4920 DebugLoc DL = MI->getDebugLoc();
4921 Register Reg = MI->getOperand(0).getReg();
4937 Module &M = *MBB.getParent()->getFunction().getParent();
4940 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4953 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4960 if (GV->hasDLLImportStorageClass())
4968 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4971 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4992 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
5013 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
5021 //===----------------------------------------------------------------------===//
5023 //===----------------------------------------------------------------------===//
5035 ExeNEON = 2
5064 // Certain instructions can go either way on Cortex-A8.
5075 static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
5077 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
5084 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
5086 assert(DReg && "S-register with no D super-register?");
5090 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5091 /// set ImplicitSReg to a register number that must be marked as implicit-use or
5092 /// zero if no register needs to be defined as implicit-use.
5102 /// If the other SPR is defined, an implicit-use of it should be added. Else,
5105 static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
5110 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
5116 ImplicitSReg = TRI->getSubReg(DReg,
5119 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
5127 // implicit-use.
5136 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
5137 const TargetRegisterInfo *TRI = &getRegisterInfo();
5139 default:
5156 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5157 MI.removeOperand(i - 1);
5175 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5176 MI.removeOperand(i - 1);
5178 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5202 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5205 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5208 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5209 MI.removeOperand(i - 1);
5215 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5236 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5237 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5240 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5243 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5244 MI.removeOperand(i - 1);
5251 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5264 // In general there's no single instruction that can perform an S <-> S
5267 // the position based purely on the combination of lane-0 and lane-1
5269 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5270 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5271 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5272 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5282 // <imp-use>.
5284 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5288 CurUndef = !MI.readsRegister(CurReg, TRI);
5302 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5306 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5324 //===----------------------------------------------------------------------===//
5326 //===----------------------------------------------------------------------===//
5328 // Swift renames NEON registers with 64-bit granularity. That means any
5329 // instruction writing an S-reg implicitly reads the containing D-reg. The
5331 // on D-registers, but f32 loads are still a problem.
5335 // VLDRS - Only writes S, partial D update.
5336 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5337 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5339 // FCONSTD can be used as a dependency-breaking instruction.
5342 const TargetRegisterInfo *TRI) const {
5347 assert(TRI && "Need TRI instance");
5353 int UseOp = -1;
5356 // Normal instructions writing only an S-register.
5365 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5372 default:
5378 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5381 // We must be able to clobber the whole D-reg.
5387 // Physical register: MI must define the full D-reg.
5388 unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
5390 if (!DReg || !MI.definesRegister(DReg, TRI))
5394 // MI has an unwanted D-register dependency.
5400 // returned non-zero.
5402 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5404 assert(TRI && "Need TRI instance");
5411 // If MI defines an S-reg, find the corresponding D super-register.
5413 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5414 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5417 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5418 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5421 // the full D-register by loading the same value to both lanes. The
5422 // instruction is micro-coded with 2 uops, so don't do this until we can
5423 // properly schedule micro-coded instructions. The dispatcher stalls cause
5426 // Insert the dependency-breaking FCONSTD before MI.
5431 MI.addRegisterKilled(DReg, TRI, true);
5439 if (MI->getNumOperands() < 4)
5441 unsigned ShOpVal = MI->getOperand(3).getImm();
5443 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5445 ((ShImm == 1 || ShImm == 2) &&
5466 if (!MOReg->isUndef())
5467 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5468 MOReg->getSubReg(), ARM::ssub_0));
5470 MOReg = &MI.getOperand(2);
5471 if (!MOReg->isUndef())
5472 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5473 MOReg->getSubReg(), ARM::ssub_1));
5491 const MachineOperand &MOReg = MI.getOperand(2);
5514 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5540 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5541 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5542 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5552 {MO_COFFSTUB, "arm-coffstub"},
5553 {MO_GOT, "arm-got"},
5554 {MO_SBREL, "arm-sbrel"},
5555 {MO_DLLIMPORT, "arm-dllimport"},
5556 {MO_SECREL, "arm-secrel"},
5557 {MO_NONLAZY, "arm-nonlazy"}};
5567 // TODO: Handle cases where Reg is a super- or sub-register of the
5575 Sign = -1;
5582 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5585 Offset = MI.getOperand(2).getImm() * Sign;
5592 const TargetRegisterInfo *TRI) {
5594 if (I->modifiesRegister(Reg, TRI))
5600 const TargetRegisterInfo *TRI) {
5605 while (CmpMI != Br->getParent()->begin()) {
5606 --CmpMI;
5607 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5609 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5613 // Check that this inst is a CMP r[0-7], #0 and that the register
5615 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5617 Register Reg = CmpMI->getOperand(0).getReg();
5620 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5624 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5633 if (Subtarget->isThumb()) {
5635 return ForCodesize ? 2 : 1;
5636 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5637 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5638 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5641 return ForCodesize ? 4 : 2;
5643 return ForCodesize ? 4 : 2;
5645 return ForCodesize ? 4 : 2;
5647 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5649 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5651 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5654 return ForCodesize ? 8 : 2;
5656 return ForCodesize ? 8 : 2;
5658 if (Subtarget->useMovt()) // MOVW + MOVT
5659 return ForCodesize ? 8 : 2;
5689 /// I2 --> B OUTLINED_FUNCTION I1
5693 /// +-------------------------+--------+-----+
5695 /// +-------------------------+--------+-----+
5699 /// +-------------------------+--------+-----+
5703 /// called with a BL instruction, and the outlined function tail-calls the
5709 /// I2 --> BL OUTLINED_FUNCTION I1
5713 /// +-------------------------+--------+-----+
5715 /// +-------------------------+--------+-----+
5719 /// +-------------------------+--------+-----+
5728 /// I2 --> BL OUTLINED_FUNCTION I1
5733 /// +-------------------------+--------+-----+
5735 /// +-------------------------+--------+-----+
5737 /// | Frame overhead in Bytes | 2 | 4 |
5739 /// +-------------------------+--------+-----+
5749 /// I2 --> BL OUTLINED_FUNCTION I1
5754 /// +-------------------------+--------+-----+
5756 /// +-------------------------+--------+-----+
5758 /// | Frame overhead in Bytes | 2 | 4 |
5760 /// +-------------------------+--------+-----+
5768 /// I2 --> BL OUTLINED_FUNCTION I1
5773 /// +-------------------------+--------+-----+
5775 /// +-------------------------+--------+-----+
5777 /// | Frame overhead in Bytes | 2 | 4 |
5779 /// +-------------------------+--------+-----+
5814 FrameNoLRSave(target.isThumb() ? 2 : 4),
5816 FrameRegSave(target.isThumb() ? 2 : 4),
5818 FrameDefault(target.isThumb() ? 2 : 4),
5825 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
5827 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5829 BitVector regsReserved = ARI->getReservedRegs(*MF);
5836 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5837 C.isAvailableInsideSeq(Reg, TRI))
5846 static bool isLRAvailable(const TargetRegisterInfo &TRI,
5855 if (MI.modifiesRegister(ARM::LR, &TRI))
5868 if (MI.readsRegister(ARM::LR, &TRI))
5885 const TargetRegisterInfo &TRI = getRegisterInfo();
5901 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5906 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5917 if (RepeatedSequenceLocs.size() < 2)
5924 // possible combinations of PAC-RET and BTI is going to yield one big subset
5926 // candidates separately for PAC-RET and BTI.
5930 // number prefer the non-BTI ones for outlining, since they have less
5934 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5943 if (RepeatedSequenceLocs.size() < 2)
5946 // Likewise, partition the candidates according to PAC-RET enablement.
5949 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5960 if (RepeatedSequenceLocs.size() < 2)
5978 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
6021 const auto Last = C.getMBB()->rbegin();
6023 C.getMBB()->isReturnBlock() && !Last->isCall()
6024 ? isLRAvailable(TRI, Last,
6026 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
6045 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
6058 // have to update the stack. Otherwise, give every candidate the default
6064 if (RepeatedSequenceLocs.size() < 2)
6097 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
6098 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
6102 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
6107 // Rq: AddrModeT1_[1|2|4] don't operate on SP
6114 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
6115 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
6116 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
6122 unsigned NumOps = MI->getDesc().getNumOperands();
6123 unsigned ImmIdx = NumOps - 3;
6125 const MachineOperand &Offset = MI->getOperand(ImmIdx);
6155 Scale = 2;
6173 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6177 default:
6182 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6186 unsigned Mask = (1 << NumBits) - 1;
6190 MI->getOperand(ImmIdx).setImm(OffVal);
6200 // branch-target-enforcement is guaranteed to be consistent between all
6202 const Function &CFn = C.getMF()->getFunction();
6203 if (CFn.hasFnAttribute("branch-target-enforcement"))
6204 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6225 if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())
6236 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6284 const TargetRegisterInfo *TRI = &getRegisterInfo();
6317 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6334 (Callee->getName() == "\01__gnu_mcount_nc" ||
6335 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6340 // as a tail-call. Explicitly list the call instructions we know about so
6341 // we don't get unexpected results with call pseudo-instructions.
6353 MachineFunction *MF = MI.getParent()->getParent();
6354 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
6363 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6374 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6378 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6385 // authentication - we insert sign and authentication instructions only if
6401 if (MI.modifiesRegister(ARM::SP, TRI))
6415 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6416 MI.modifiesRegister(ARM::ITSTATE, TRI))
6447 .addImm(-Align)
6455 .addImm(-Align)
6474 int LROffset = Auth ? Align - 4 : Align;
6476 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6478 MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset));
6484 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6486 MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align));
6498 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6499 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
6541 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6556 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6573 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6586 // tail-call.
6588 MachineInstr *Call = &*--MBB.instr_end();
6590 unsigned FuncOp = isThumb ? 2 : 0;
6591 unsigned Opc = Call->getOperand(FuncOp).isReg()
6597 .add(Call->getOperand(FuncOp));
6598 if (isThumb && !Call->getOperand(FuncOp).isReg())
6600 Call->eraseFromParent();
6624 ->getInfo<ARMFunctionInfo>()
6625 ->shouldSignReturnAddress(true);
6694 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6698 assert(Reg != 0 && "No callee-saved register available?");
6708 It--;
6711 // We have the default case. Save and restore from SP.
6718 It--;
6758 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6764 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6781 MF(EndLoop->getParent()->getParent()),
6782 TII(MF->getSubtarget().getInstrInfo()) {}
6800 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6801 Cond.push_back(EndLoop->getOperand(1));
6802 Cond.push_back(EndLoop->getOperand(2));
6803 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6804 TII->reverseBranchCondition(Cond);
6807 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6816 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6817 .addReg(LoopDec->getOperand(0).getReg())
6839 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6846 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6857 // Determine which values will be loop-carried after the schedule is
6864 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6868 .first->second.set(0);
6875 .first->second |= ((1 << (OStg - Stg)) - 1);
6880 // Determine more-or-less what the proposed schedule (reversed) is going to
6881 // be; it might not be quite the same because the within-cycle ordering
6885 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6891 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6896 // Learn whether the last use/def of each cross-iteration register is a use or
6900 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6907 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6908 CIter->second[SEEN_AS_LIVE])
6911 CIter->second.set(SEEN_AS_LIVE);
6913 CIter->second.set(LAST_IS_USE);
6922 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6923 EndLoop->getParent()->end(), false, false);
6924 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
6929 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6933 // Track what cross-iteration registers would be seen as live
6942 CIter->second.reset(0);
6943 CIter->second.reset(SEEN_AS_LIVE);
6947 for (auto &S : SU->Preds) {
6955 if (Stg - Stg2 < MAX_STAGES)
6956 CIter->second.set(Stg - Stg2);
6957 CIter->second.set(SEEN_AS_LIVE);
6967 if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {
6977 MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
6978 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6980 Preheader = *std::next(LoopBB->pred_begin());
6982 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6985 // it can be marked as non-pipelineable, allowing the pipeliner to force
6988 for (auto &L : LoopBB->instrs()) {
7005 // %2 = phi %1, <not loop>, %..., %loop
7006 // %3 = t2LoopDec %2, <imm>
7009 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
7010 for (auto &L : LoopBB->instrs())
7015 Register LoopDecResult = I->getOperand(0).getReg();
7016 MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
7018 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
7021 for (auto &J : Preheader->instrs())