1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass does a few optimisations related to Tail predicated loops 10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks 11 /// the goal is to maximize the sizes of the blocks that will be created by the 12 /// MVE VPT Block Insertion pass (which runs after register allocation). For 13 /// tail predicated loops we transform the loop into something that will 14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier. 15 /// 16 //===----------------------------------------------------------------------===// 17 18 #include "ARM.h" 19 #include "ARMSubtarget.h" 20 #include "MVETailPredUtils.h" 21 #include "Thumb2InstrInfo.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineLoopInfo.h" 29 #include "llvm/InitializePasses.h" 30 #include "llvm/Support/Debug.h" 31 #include <cassert> 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "arm-mve-vpt-opts" 36 37 static cl::opt<bool> 38 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, 39 cl::desc("Enable merging Loop End and Dec instructions."), 40 cl::init(true)); 41 42 static cl::opt<bool> 43 SetLRPredicate("arm-set-lr-predicate", cl::Hidden, 44 cl::desc("Enable setting lr as a predicate in tail predication regions."), 45 cl::init(true)); 46 47 namespace { 48 class MVETPAndVPTOptimisations : public MachineFunctionPass { 49 public: 50 static char ID; 51 const Thumb2InstrInfo *TII; 52 MachineRegisterInfo *MRI; 53 54 MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {} 55 56 bool runOnMachineFunction(MachineFunction &Fn) override; 57 58 void getAnalysisUsage(AnalysisUsage &AU) const override { 59 AU.addRequired<MachineLoopInfoWrapperPass>(); 60 AU.addPreserved<MachineLoopInfoWrapperPass>(); 61 AU.addRequired<MachineDominatorTreeWrapperPass>(); 62 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 63 MachineFunctionPass::getAnalysisUsage(AU); 64 } 65 66 StringRef getPassName() const override { 67 return "ARM MVE TailPred and VPT Optimisation Pass"; 68 } 69 70 private: 71 bool LowerWhileLoopStart(MachineLoop *ML); 72 bool MergeLoopEnd(MachineLoop *ML); 73 bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT); 74 MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB, 75 MachineInstr &Instr, 76 MachineOperand &User, 77 Register Target); 78 bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB); 79 bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB); 80 bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT); 81 bool ConvertVPSEL(MachineBasicBlock &MBB); 82 bool HintDoLoopStartReg(MachineBasicBlock &MBB); 83 MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader, 84 MachineInstr *LoopStart); 85 }; 86 87 char MVETPAndVPTOptimisations::ID = 0; 88 89 } // end anonymous namespace 90 91 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE, 92 "ARM MVE TailPred and VPT Optimisations pass", false, 93 false) 94 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 95 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 96 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE, 97 "ARM MVE TailPred and VPT Optimisations pass", false, false) 98 99 static MachineInstr *LookThroughCOPY(MachineInstr *MI, 100 MachineRegisterInfo *MRI) { 101 while (MI && MI->getOpcode() == TargetOpcode::COPY && 102 MI->getOperand(1).getReg().isVirtual()) 103 MI = MRI->getVRegDef(MI->getOperand(1).getReg()); 104 return MI; 105 } 106 107 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and 108 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops 109 // at the moment, returning a t2DoLoopStart in LoopStart. 110 static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, 111 MachineInstr *&LoopStart, MachineInstr *&LoopPhi, 112 MachineInstr *&LoopDec, MachineInstr *&LoopEnd) { 113 MachineBasicBlock *Header = ML->getHeader(); 114 MachineBasicBlock *Latch = ML->getLoopLatch(); 115 if (!Header || !Latch) { 116 LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n"); 117 return false; 118 } 119 120 // Find the loop end from the terminators. 121 LoopEnd = nullptr; 122 for (auto &T : Latch->terminators()) { 123 if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) { 124 LoopEnd = &T; 125 break; 126 } 127 if (T.getOpcode() == ARM::t2LoopEndDec && 128 T.getOperand(2).getMBB() == Header) { 129 LoopEnd = &T; 130 break; 131 } 132 } 133 if (!LoopEnd) { 134 LLVM_DEBUG(dbgs() << " no LoopEnd\n"); 135 return false; 136 } 137 LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd); 138 139 // Find the dec from the use of the end. There may be copies between 140 // instructions. We expect the loop to loop like: 141 // $vs = t2DoLoopStart ... 142 // loop: 143 // $vp = phi [ $vs ], [ $vd ] 144 // ... 145 // $vd = t2LoopDec $vp 146 // ... 147 // t2LoopEnd $vd, loop 148 if (LoopEnd->getOpcode() == ARM::t2LoopEndDec) 149 LoopDec = LoopEnd; 150 else { 151 LoopDec = 152 LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI); 153 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) { 154 LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n"); 155 return false; 156 } 157 } 158 LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec); 159 160 LoopPhi = 161 LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI); 162 if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI || 163 LoopPhi->getNumOperands() != 5 || 164 (LoopPhi->getOperand(2).getMBB() != Latch && 165 LoopPhi->getOperand(4).getMBB() != Latch)) { 166 LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n"); 167 return false; 168 } 169 LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi); 170 171 Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch 172 ? LoopPhi->getOperand(3).getReg() 173 : LoopPhi->getOperand(1).getReg(); 174 LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI); 175 if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart && 176 LoopStart->getOpcode() != ARM::t2WhileLoopSetup && 177 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) { 178 LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n"); 179 return false; 180 } 181 LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart); 182 183 return true; 184 } 185 186 static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) { 187 MachineBasicBlock *MBB = MI->getParent(); 188 assert(MI->getOpcode() == ARM::t2WhileLoopSetup && 189 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!"); 190 191 // Subs 192 MachineInstrBuilder MIB = 193 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); 194 MIB.add(MI->getOperand(0)); 195 MIB.add(MI->getOperand(1)); 196 MIB.addImm(0); 197 MIB.addImm(ARMCC::AL); 198 MIB.addReg(ARM::NoRegister); 199 MIB.addReg(ARM::CPSR, RegState::Define); 200 201 // Attempt to find a t2WhileLoopStart and revert to a t2Bcc. 202 for (MachineInstr &I : MBB->terminators()) { 203 if (I.getOpcode() == ARM::t2WhileLoopStart) { 204 MachineInstrBuilder MIB = 205 BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc)); 206 MIB.add(MI->getOperand(1)); // branch target 207 MIB.addImm(ARMCC::EQ); 208 MIB.addReg(ARM::CPSR); 209 I.eraseFromParent(); 210 break; 211 } 212 } 213 214 MI->eraseFromParent(); 215 } 216 217 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the 218 // start of a while loop: 219 // %a:gprlr = t2WhileLoopSetup %Cnt 220 // t2WhileLoopStart %a, %BB 221 // We want to convert those to a single instruction which, like t2LoopEndDec and 222 // t2DoLoopStartTP is both a terminator and produces a value: 223 // %a:grplr: t2WhileLoopStartLR %Cnt, %BB 224 // 225 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and 226 // t2WhileLoopStart are not valid past regalloc. 227 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) { 228 LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop " 229 << ML->getHeader()->getName() << "\n"); 230 231 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 232 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 233 return false; 234 235 if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup) 236 return false; 237 238 Register LR = LoopStart->getOperand(0).getReg(); 239 auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) { 240 return MI.getOpcode() == ARM::t2WhileLoopStart; 241 }); 242 if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) { 243 RevertWhileLoopSetup(LoopStart, TII); 244 RevertLoopDec(LoopStart, TII); 245 RevertLoopEnd(LoopStart, TII); 246 return true; 247 } 248 249 MachineInstrBuilder MI = 250 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(), 251 TII->get(ARM::t2WhileLoopStartLR), LR) 252 .add(LoopStart->getOperand(1)) 253 .add(WLSIt->getOperand(1)); 254 (void)MI; 255 LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr()); 256 257 WLSIt->eraseFromParent(); 258 LoopStart->eraseFromParent(); 259 return true; 260 } 261 262 // Return true if this instruction is invalid in a low overhead loop, usually 263 // because it clobbers LR. 264 static bool IsInvalidTPInstruction(MachineInstr &MI) { 265 return MI.isCall() || isLoopStart(MI); 266 } 267 268 // Starting from PreHeader, search for invalid instructions back until the 269 // LoopStart block is reached. If invalid instructions are found, the loop start 270 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will 271 // return the new DLS LoopStart if updated. 272 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors( 273 MachineBasicBlock *PreHeader, MachineInstr *LoopStart) { 274 SmallVector<MachineBasicBlock *> Worklist; 275 SmallPtrSet<MachineBasicBlock *, 4> Visited; 276 Worklist.push_back(PreHeader); 277 Visited.insert(LoopStart->getParent()); 278 279 while (!Worklist.empty()) { 280 MachineBasicBlock *MBB = Worklist.pop_back_val(); 281 if (Visited.count(MBB)) 282 continue; 283 284 for (MachineInstr &MI : *MBB) { 285 if (!IsInvalidTPInstruction(MI)) 286 continue; 287 288 LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI); 289 290 // Create a t2DoLoopStart at the end of the preheader. 291 MachineInstrBuilder MIB = 292 BuildMI(*PreHeader, PreHeader->getFirstTerminator(), 293 LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart)); 294 MIB.add(LoopStart->getOperand(0)); 295 MIB.add(LoopStart->getOperand(1)); 296 297 // Make sure to remove the kill flags, to prevent them from being invalid. 298 LoopStart->getOperand(1).setIsKill(false); 299 300 // Revert the t2WhileLoopStartLR to a CMP and Br. 301 RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true); 302 return MIB; 303 } 304 305 Visited.insert(MBB); 306 for (auto *Pred : MBB->predecessors()) 307 Worklist.push_back(Pred); 308 } 309 return LoopStart; 310 } 311 312 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into 313 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR 314 // will be valid to be used for the low overhead loop, which means nothing else 315 // is using LR (especially calls) and there are no superfluous copies in the 316 // loop. The t2LoopEndDec is a branching terminator that produces a value (the 317 // decrement) around the loop edge, which means we need to be careful that they 318 // will be valid to allocate without any spilling. 319 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { 320 if (!MergeEndDec) 321 return false; 322 323 LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName() 324 << "\n"); 325 326 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 327 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 328 return false; 329 330 // Check if there is an illegal instruction (a call) in the low overhead loop 331 // and if so revert it now before we get any further. While loops also need to 332 // check the preheaders, but can be reverted to a DLS loop if needed. 333 auto *PreHeader = ML->getLoopPreheader(); 334 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader) 335 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart); 336 337 for (MachineBasicBlock *MBB : ML->blocks()) { 338 for (MachineInstr &MI : *MBB) { 339 if (IsInvalidTPInstruction(MI)) { 340 LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI); 341 if (LoopStart->getOpcode() == ARM::t2DoLoopStart) 342 RevertDoLoopStart(LoopStart, TII); 343 else 344 RevertWhileLoopStartLR(LoopStart, TII); 345 RevertLoopDec(LoopDec, TII); 346 RevertLoopEnd(LoopEnd, TII); 347 return true; 348 } 349 } 350 } 351 352 // Remove any copies from the loop, to ensure the phi that remains is both 353 // simpler and contains no extra uses. Because t2LoopEndDec is a terminator 354 // that cannot spill, we need to be careful what remains in the loop. 355 Register PhiReg = LoopPhi->getOperand(0).getReg(); 356 Register DecReg = LoopDec->getOperand(0).getReg(); 357 Register StartReg = LoopStart->getOperand(0).getReg(); 358 // Ensure the uses are expected, and collect any copies we want to remove. 359 SmallVector<MachineInstr *, 4> Copies; 360 auto CheckUsers = [&Copies](Register BaseReg, 361 ArrayRef<MachineInstr *> ExpectedUsers, 362 MachineRegisterInfo *MRI) { 363 SmallVector<Register, 4> Worklist; 364 Worklist.push_back(BaseReg); 365 while (!Worklist.empty()) { 366 Register Reg = Worklist.pop_back_val(); 367 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 368 if (llvm::is_contained(ExpectedUsers, &MI)) 369 continue; 370 if (MI.getOpcode() != TargetOpcode::COPY || 371 !MI.getOperand(0).getReg().isVirtual()) { 372 LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI); 373 return false; 374 } 375 Worklist.push_back(MI.getOperand(0).getReg()); 376 Copies.push_back(&MI); 377 } 378 } 379 return true; 380 }; 381 if (!CheckUsers(PhiReg, {LoopDec}, MRI) || 382 !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) || 383 !CheckUsers(StartReg, {LoopPhi}, MRI)) { 384 // Don't leave a t2WhileLoopStartLR without the LoopDecEnd. 385 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) { 386 RevertWhileLoopStartLR(LoopStart, TII); 387 RevertLoopDec(LoopDec, TII); 388 RevertLoopEnd(LoopEnd, TII); 389 return true; 390 } 391 return false; 392 } 393 394 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass); 395 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass); 396 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass); 397 398 if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) { 399 LoopPhi->getOperand(3).setReg(StartReg); 400 LoopPhi->getOperand(1).setReg(DecReg); 401 } else { 402 LoopPhi->getOperand(1).setReg(StartReg); 403 LoopPhi->getOperand(3).setReg(DecReg); 404 } 405 406 SmallVector<MachineOperand, 4> Cond; // For analyzeBranch. 407 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. 408 if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) { 409 // If the LoopEnd falls through, need to insert a t2B to the fall-through 410 // block so that the non-analyzable t2LoopEndDec doesn't fall through. 411 MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator(); 412 BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B)) 413 .addMBB(&*MBBI) 414 .add(predOps(ARMCC::AL)); 415 } 416 417 // Replace the loop dec and loop end as a single instruction. 418 MachineInstrBuilder MI = 419 BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(), 420 TII->get(ARM::t2LoopEndDec), DecReg) 421 .addReg(PhiReg) 422 .add(LoopEnd->getOperand(1)); 423 (void)MI; 424 LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr()); 425 426 LoopDec->eraseFromParent(); 427 LoopEnd->eraseFromParent(); 428 for (auto *MI : Copies) 429 MI->eraseFromParent(); 430 return true; 431 } 432 433 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP 434 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP 435 // instruction, making the backend ARMLowOverheadLoops passes job of finding the 436 // VCTP operand much simpler. 437 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML, 438 MachineDominatorTree *DT) { 439 LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop " 440 << ML->getHeader()->getName() << "\n"); 441 442 // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's 443 // in the loop. 444 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 445 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 446 return false; 447 if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart && 448 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) 449 return false; 450 451 SmallVector<MachineInstr *, 4> VCTPs; 452 SmallVector<MachineInstr *, 4> MVEInstrs; 453 for (MachineBasicBlock *BB : ML->blocks()) { 454 for (MachineInstr &MI : *BB) 455 if (isVCTP(&MI)) 456 VCTPs.push_back(&MI); 457 else if (findFirstVPTPredOperandIdx(MI) != -1) 458 MVEInstrs.push_back(&MI); 459 } 460 461 if (VCTPs.empty()) { 462 LLVM_DEBUG(dbgs() << " no VCTPs\n"); 463 return false; 464 } 465 466 // Check all VCTPs are the same. 467 MachineInstr *FirstVCTP = *VCTPs.begin(); 468 for (MachineInstr *VCTP : VCTPs) { 469 LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP); 470 if (VCTP->getOpcode() != FirstVCTP->getOpcode() || 471 VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) { 472 LLVM_DEBUG(dbgs() << " VCTP's are not identical\n"); 473 return false; 474 } 475 } 476 477 // Check for the register being used can be setup before the loop. We expect 478 // this to be: 479 // $vx = ... 480 // loop: 481 // $vp = PHI [ $vx ], [ $vd ] 482 // .. 483 // $vpr = VCTP $vp 484 // .. 485 // $vd = t2SUBri $vp, #n 486 // .. 487 Register CountReg = FirstVCTP->getOperand(1).getReg(); 488 if (!CountReg.isVirtual()) { 489 LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n"); 490 return false; 491 } 492 MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI); 493 if (!Phi || Phi->getOpcode() != TargetOpcode::PHI || 494 Phi->getNumOperands() != 5 || 495 (Phi->getOperand(2).getMBB() != ML->getLoopLatch() && 496 Phi->getOperand(4).getMBB() != ML->getLoopLatch())) { 497 LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n"); 498 return false; 499 } 500 CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch() 501 ? Phi->getOperand(3).getReg() 502 : Phi->getOperand(1).getReg(); 503 504 // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of 505 // the preheader and add the new CountReg to it. We attempt to place it late 506 // in the preheader, but may need to move that earlier based on uses. 507 MachineBasicBlock *MBB = LoopStart->getParent(); 508 MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator(); 509 for (MachineInstr &Use : 510 MRI->use_instructions(LoopStart->getOperand(0).getReg())) 511 if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) || 512 !DT->dominates(ML->getHeader(), Use.getParent())) { 513 LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n"); 514 return false; 515 } 516 517 unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart 518 ? ARM::t2DoLoopStartTP 519 : ARM::t2WhileLoopStartTP; 520 MachineInstrBuilder MI = 521 BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc)) 522 .add(LoopStart->getOperand(0)) 523 .add(LoopStart->getOperand(1)) 524 .addReg(CountReg); 525 if (NewOpc == ARM::t2WhileLoopStartTP) 526 MI.add(LoopStart->getOperand(2)); 527 LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with " 528 << *MI.getInstr()); 529 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass); 530 LoopStart->eraseFromParent(); 531 532 if (SetLRPredicate) { 533 // Each instruction in the loop needs to be using LR as the predicate from 534 // the Phi as the predicate. 535 Register LR = LoopPhi->getOperand(0).getReg(); 536 for (MachineInstr *MI : MVEInstrs) { 537 int Idx = findFirstVPTPredOperandIdx(*MI); 538 MI->getOperand(Idx + 2).setReg(LR); 539 } 540 } 541 542 return true; 543 } 544 545 // Returns true if Opcode is any VCMP Opcode. 546 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; } 547 548 // Returns true if a VCMP with this Opcode can have its operands swapped. 549 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs, 550 // and VCMPr instructions (since the r is always on the right). 551 static bool CanHaveSwappedOperands(unsigned Opcode) { 552 switch (Opcode) { 553 default: 554 return true; 555 case ARM::MVE_VCMPf32: 556 case ARM::MVE_VCMPf16: 557 case ARM::MVE_VCMPf32r: 558 case ARM::MVE_VCMPf16r: 559 case ARM::MVE_VCMPi8r: 560 case ARM::MVE_VCMPi16r: 561 case ARM::MVE_VCMPi32r: 562 case ARM::MVE_VCMPu8r: 563 case ARM::MVE_VCMPu16r: 564 case ARM::MVE_VCMPu32r: 565 case ARM::MVE_VCMPs8r: 566 case ARM::MVE_VCMPs16r: 567 case ARM::MVE_VCMPs32r: 568 return false; 569 } 570 } 571 572 // Returns the CondCode of a VCMP Instruction. 573 static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) { 574 assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP"); 575 return ARMCC::CondCodes(Instr.getOperand(3).getImm()); 576 } 577 578 // Returns true if Cond is equivalent to a VPNOT instruction on the result of 579 // Prev. Cond and Prev must be VCMPs. 580 static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) { 581 assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode())); 582 583 // Opcodes must match. 584 if (Cond.getOpcode() != Prev.getOpcode()) 585 return false; 586 587 MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2); 588 MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2); 589 590 // If the VCMP has the opposite condition with the same operands, we can 591 // replace it with a VPNOT 592 ARMCC::CondCodes ExpectedCode = GetCondCode(Cond); 593 ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode); 594 if (ExpectedCode == GetCondCode(Prev)) 595 if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2)) 596 return true; 597 // Check again with operands swapped if possible 598 if (!CanHaveSwappedOperands(Cond.getOpcode())) 599 return false; 600 ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode); 601 return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) && 602 CondOP2.isIdenticalTo(PrevOP1); 603 } 604 605 // Returns true if Instr writes to VCCR. 606 static bool IsWritingToVCCR(MachineInstr &Instr) { 607 if (Instr.getNumOperands() == 0) 608 return false; 609 MachineOperand &Dst = Instr.getOperand(0); 610 if (!Dst.isReg()) 611 return false; 612 Register DstReg = Dst.getReg(); 613 if (!DstReg.isVirtual()) 614 return false; 615 MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo(); 616 const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg); 617 return RegClass && (RegClass->getID() == ARM::VCCRRegClassID); 618 } 619 620 // Transforms 621 // <Instr that uses %A ('User' Operand)> 622 // Into 623 // %K = VPNOT %Target 624 // <Instr that uses %K ('User' Operand)> 625 // And returns the newly inserted VPNOT. 626 // This optimization is done in the hopes of preventing spills/reloads of VPR by 627 // reducing the number of VCCR values with overlapping lifetimes. 628 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT( 629 MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User, 630 Register Target) { 631 Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target)); 632 633 MachineInstrBuilder MIBuilder = 634 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) 635 .addDef(NewResult) 636 .addReg(Target); 637 addUnpredicatedMveVpredNOp(MIBuilder); 638 639 // Make the user use NewResult instead, and clear its kill flag. 640 User.setReg(NewResult); 641 User.setIsKill(false); 642 643 LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): "; 644 MIBuilder.getInstr()->dump()); 645 646 return *MIBuilder.getInstr(); 647 } 648 649 // Moves a VPNOT before its first user if an instruction that uses Reg is found 650 // in-between the VPNOT and its user. 651 // Returns true if there is at least one user of the VPNOT in the block. 652 static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, 653 MachineBasicBlock::iterator Iter, 654 Register Reg) { 655 assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!"); 656 assert(getVPTInstrPredicate(*Iter) == ARMVCC::None && 657 "The VPNOT cannot be predicated"); 658 659 MachineInstr &VPNOT = *Iter; 660 Register VPNOTResult = VPNOT.getOperand(0).getReg(); 661 Register VPNOTOperand = VPNOT.getOperand(1).getReg(); 662 663 // Whether the VPNOT will need to be moved, and whether we found a user of the 664 // VPNOT. 665 bool MustMove = false, HasUser = false; 666 MachineOperand *VPNOTOperandKiller = nullptr; 667 for (; Iter != MBB.end(); ++Iter) { 668 if (MachineOperand *MO = 669 Iter->findRegisterUseOperand(VPNOTOperand, /*TRI=*/nullptr, 670 /*isKill*/ true)) { 671 // If we find the operand that kills the VPNOTOperand's result, save it. 672 VPNOTOperandKiller = MO; 673 } 674 675 if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) { 676 MustMove = true; 677 continue; 678 } 679 680 if (Iter->findRegisterUseOperandIdx(VPNOTResult, /*TRI=*/nullptr) == -1) 681 continue; 682 683 HasUser = true; 684 if (!MustMove) 685 break; 686 687 // Move the VPNOT right before Iter 688 LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: "; 689 Iter->dump()); 690 MBB.splice(Iter, &MBB, VPNOT.getIterator()); 691 // If we move the instr, and its operand was killed earlier, remove the kill 692 // flag. 693 if (VPNOTOperandKiller) 694 VPNOTOperandKiller->setIsKill(false); 695 696 break; 697 } 698 return HasUser; 699 } 700 701 // This optimisation attempts to reduce the number of overlapping lifetimes of 702 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example, 703 // this replaces 704 // %A:vccr = (something) 705 // %B:vccr = VPNOT %A 706 // %Foo = (some op that uses %B) 707 // %Bar = (some op that uses %A) 708 // With 709 // %A:vccr = (something) 710 // %B:vccr = VPNOT %A 711 // %Foo = (some op that uses %B) 712 // %TMP2:vccr = VPNOT %B 713 // %Bar = (some op that uses %A) 714 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) { 715 MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end(); 716 SmallVector<MachineInstr *, 4> DeadInstructions; 717 bool Modified = false; 718 719 while (Iter != End) { 720 Register VCCRValue, OppositeVCCRValue; 721 // The first loop looks for 2 unpredicated instructions: 722 // %A:vccr = (instr) ; A is stored in VCCRValue 723 // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue 724 for (; Iter != End; ++Iter) { 725 // We're only interested in unpredicated instructions that write to VCCR. 726 if (!IsWritingToVCCR(*Iter) || 727 getVPTInstrPredicate(*Iter) != ARMVCC::None) 728 continue; 729 Register Dst = Iter->getOperand(0).getReg(); 730 731 // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've 732 // found what we were looking for. 733 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT && 734 Iter->findRegisterUseOperandIdx(VCCRValue, /*TRI=*/nullptr) != -1) { 735 // Move the VPNOT closer to its first user if needed, and ignore if it 736 // has no users. 737 if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue)) 738 continue; 739 740 OppositeVCCRValue = Dst; 741 ++Iter; 742 break; 743 } 744 745 // Else, just set VCCRValue. 746 VCCRValue = Dst; 747 } 748 749 // If the first inner loop didn't find anything, stop here. 750 if (Iter == End) 751 break; 752 753 assert(VCCRValue && OppositeVCCRValue && 754 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop " 755 "stopped before the end of the block!"); 756 assert(VCCRValue != OppositeVCCRValue && 757 "VCCRValue should not be equal to OppositeVCCRValue!"); 758 759 // LastVPNOTResult always contains the same value as OppositeVCCRValue. 760 Register LastVPNOTResult = OppositeVCCRValue; 761 762 // This second loop tries to optimize the remaining instructions. 763 for (; Iter != End; ++Iter) { 764 bool IsInteresting = false; 765 766 if (MachineOperand *MO = 767 Iter->findRegisterUseOperand(VCCRValue, /*TRI=*/nullptr)) { 768 IsInteresting = true; 769 770 // - If the instruction is a VPNOT, it can be removed, and we can just 771 // replace its uses with LastVPNOTResult. 772 // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue. 773 if (Iter->getOpcode() == ARM::MVE_VPNOT) { 774 Register Result = Iter->getOperand(0).getReg(); 775 776 MRI->replaceRegWith(Result, LastVPNOTResult); 777 DeadInstructions.push_back(&*Iter); 778 Modified = true; 779 780 LLVM_DEBUG(dbgs() 781 << "Replacing all uses of '" << printReg(Result) 782 << "' with '" << printReg(LastVPNOTResult) << "'\n"); 783 } else { 784 MachineInstr &VPNOT = 785 ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult); 786 Modified = true; 787 788 LastVPNOTResult = VPNOT.getOperand(0).getReg(); 789 std::swap(VCCRValue, OppositeVCCRValue); 790 791 LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue) 792 << "' with '" << printReg(LastVPNOTResult) 793 << "' in instr: " << *Iter); 794 } 795 } else { 796 // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult 797 // instead as they contain the same value. 798 if (MachineOperand *MO = Iter->findRegisterUseOperand( 799 OppositeVCCRValue, /*TRI=*/nullptr)) { 800 IsInteresting = true; 801 802 // This is pointless if LastVPNOTResult == OppositeVCCRValue. 803 if (LastVPNOTResult != OppositeVCCRValue) { 804 LLVM_DEBUG(dbgs() << "Replacing usage of '" 805 << printReg(OppositeVCCRValue) << "' with '" 806 << printReg(LastVPNOTResult) << " for instr: "; 807 Iter->dump()); 808 MO->setReg(LastVPNOTResult); 809 Modified = true; 810 } 811 812 MO->setIsKill(false); 813 } 814 815 // If this is an unpredicated VPNOT on 816 // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it. 817 if (Iter->getOpcode() == ARM::MVE_VPNOT && 818 getVPTInstrPredicate(*Iter) == ARMVCC::None) { 819 Register VPNOTOperand = Iter->getOperand(1).getReg(); 820 if (VPNOTOperand == LastVPNOTResult || 821 VPNOTOperand == OppositeVCCRValue) { 822 IsInteresting = true; 823 824 std::swap(VCCRValue, OppositeVCCRValue); 825 LastVPNOTResult = Iter->getOperand(0).getReg(); 826 } 827 } 828 } 829 830 // If this instruction was not interesting, and it writes to VCCR, stop. 831 if (!IsInteresting && IsWritingToVCCR(*Iter)) 832 break; 833 } 834 } 835 836 for (MachineInstr *DeadInstruction : DeadInstructions) 837 DeadInstruction->eraseFromParent(); 838 839 return Modified; 840 } 841 842 // This optimisation replaces VCMPs with VPNOTs when they are equivalent. 843 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { 844 SmallVector<MachineInstr *, 4> DeadInstructions; 845 846 // The last VCMP that we have seen and that couldn't be replaced. 847 // This is reset when an instruction that writes to VCCR/VPR is found, or when 848 // a VCMP is replaced with a VPNOT. 849 // We'll only replace VCMPs with VPNOTs when this is not null, and when the 850 // current VCMP is the opposite of PrevVCMP. 851 MachineInstr *PrevVCMP = nullptr; 852 // If we find an instruction that kills the result of PrevVCMP, we save the 853 // operand here to remove the kill flag in case we need to use PrevVCMP's 854 // result. 855 MachineOperand *PrevVCMPResultKiller = nullptr; 856 857 for (MachineInstr &Instr : MBB.instrs()) { 858 if (PrevVCMP) { 859 if (MachineOperand *MO = 860 Instr.findRegisterUseOperand(PrevVCMP->getOperand(0).getReg(), 861 /*TRI=*/nullptr, /*isKill*/ true)) { 862 // If we come accross the instr that kills PrevVCMP's result, record it 863 // so we can remove the kill flag later if we need to. 864 PrevVCMPResultKiller = MO; 865 } 866 } 867 868 // Ignore predicated instructions. 869 if (getVPTInstrPredicate(Instr) != ARMVCC::None) 870 continue; 871 872 // Only look at VCMPs 873 if (!IsVCMP(Instr.getOpcode())) { 874 // If the instruction writes to VCCR, forget the previous VCMP. 875 if (IsWritingToVCCR(Instr)) 876 PrevVCMP = nullptr; 877 continue; 878 } 879 880 if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) { 881 PrevVCMP = &Instr; 882 continue; 883 } 884 885 // The register containing the result of the VCMP that we're going to 886 // replace. 887 Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg(); 888 889 // Build a VPNOT to replace the VCMP, reusing its operands. 890 MachineInstrBuilder MIBuilder = 891 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) 892 .add(Instr.getOperand(0)) 893 .addReg(PrevVCMPResultReg); 894 addUnpredicatedMveVpredNOp(MIBuilder); 895 LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): "; 896 MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: "; 897 Instr.dump()); 898 899 // If we found an instruction that uses, and kills PrevVCMP's result, 900 // remove the kill flag. 901 if (PrevVCMPResultKiller) 902 PrevVCMPResultKiller->setIsKill(false); 903 904 // Finally, mark the old VCMP for removal and reset 905 // PrevVCMP/PrevVCMPResultKiller. 906 DeadInstructions.push_back(&Instr); 907 PrevVCMP = nullptr; 908 PrevVCMPResultKiller = nullptr; 909 } 910 911 for (MachineInstr *DeadInstruction : DeadInstructions) 912 DeadInstruction->eraseFromParent(); 913 914 return !DeadInstructions.empty(); 915 } 916 917 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB, 918 MachineDominatorTree *DT) { 919 // Scan through the block, looking for instructions that use constants moves 920 // into VPR that are the negative of one another. These are expected to be 921 // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant 922 // mask is kept it or and VPNOT's of it are added or reused as we scan through 923 // the function. 924 unsigned LastVPTImm = 0; 925 Register LastVPTReg = 0; 926 SmallSet<MachineInstr *, 4> DeadInstructions; 927 928 for (MachineInstr &Instr : MBB.instrs()) { 929 // Look for predicated MVE instructions. 930 int PIdx = llvm::findFirstVPTPredOperandIdx(Instr); 931 if (PIdx == -1) 932 continue; 933 Register VPR = Instr.getOperand(PIdx + 1).getReg(); 934 if (!VPR.isVirtual()) 935 continue; 936 937 // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr. 938 MachineInstr *Copy = MRI->getVRegDef(VPR); 939 if (!Copy || Copy->getOpcode() != TargetOpcode::COPY || 940 !Copy->getOperand(1).getReg().isVirtual() || 941 MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) { 942 LastVPTReg = 0; 943 continue; 944 } 945 Register GPR = Copy->getOperand(1).getReg(); 946 947 // Find the Immediate used by the copy. 948 auto getImm = [&](Register GPR) -> unsigned { 949 MachineInstr *Def = MRI->getVRegDef(GPR); 950 if (Def && (Def->getOpcode() == ARM::t2MOVi || 951 Def->getOpcode() == ARM::t2MOVi16)) 952 return Def->getOperand(1).getImm(); 953 return -1U; 954 }; 955 unsigned Imm = getImm(GPR); 956 if (Imm == -1U) { 957 LastVPTReg = 0; 958 continue; 959 } 960 961 unsigned NotImm = ~Imm & 0xffff; 962 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) { 963 MRI->clearKillFlags(LastVPTReg); 964 Instr.getOperand(PIdx + 1).setReg(LastVPTReg); 965 if (MRI->use_empty(VPR)) { 966 DeadInstructions.insert(Copy); 967 if (MRI->hasOneUse(GPR)) 968 DeadInstructions.insert(MRI->getVRegDef(GPR)); 969 } 970 LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr); 971 VPR = LastVPTReg; 972 } else if (LastVPTReg != 0 && LastVPTImm == NotImm) { 973 // We have found the not of a previous constant. Create a VPNot of the 974 // earlier predicate reg and use it instead of the copy. 975 Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass); 976 auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(), 977 TII->get(ARM::MVE_VPNOT), NewVPR) 978 .addReg(LastVPTReg); 979 addUnpredicatedMveVpredNOp(VPNot); 980 981 // Use the new register and check if the def is now dead. 982 Instr.getOperand(PIdx + 1).setReg(NewVPR); 983 if (MRI->use_empty(VPR)) { 984 DeadInstructions.insert(Copy); 985 if (MRI->hasOneUse(GPR)) 986 DeadInstructions.insert(MRI->getVRegDef(GPR)); 987 } 988 LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at " 989 << Instr); 990 VPR = NewVPR; 991 } 992 993 LastVPTImm = Imm; 994 LastVPTReg = VPR; 995 } 996 997 for (MachineInstr *DI : DeadInstructions) 998 DI->eraseFromParent(); 999 1000 return !DeadInstructions.empty(); 1001 } 1002 1003 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a 1004 // somewhat blunt approximation to allow tail predicated with vpsel 1005 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly 1006 // different semantics under tail predication. Until that is modelled we just 1007 // convert to a VMOVT (via a predicated VORR) instead. 1008 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) { 1009 bool HasVCTP = false; 1010 SmallVector<MachineInstr *, 4> DeadInstructions; 1011 1012 for (MachineInstr &MI : MBB.instrs()) { 1013 if (isVCTP(&MI)) { 1014 HasVCTP = true; 1015 continue; 1016 } 1017 1018 if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL) 1019 continue; 1020 1021 MachineInstrBuilder MIBuilder = 1022 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR)) 1023 .add(MI.getOperand(0)) 1024 .add(MI.getOperand(1)) 1025 .add(MI.getOperand(1)) 1026 .addImm(ARMVCC::Then) 1027 .add(MI.getOperand(4)) 1028 .add(MI.getOperand(5)) 1029 .add(MI.getOperand(2)); 1030 // Silence unused variable warning in release builds. 1031 (void)MIBuilder; 1032 LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump(); 1033 dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump()); 1034 DeadInstructions.push_back(&MI); 1035 } 1036 1037 for (MachineInstr *DeadInstruction : DeadInstructions) 1038 DeadInstruction->eraseFromParent(); 1039 1040 return !DeadInstructions.empty(); 1041 } 1042 1043 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as 1044 // the instruction may be removable as a noop. 1045 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) { 1046 bool Changed = false; 1047 for (MachineInstr &MI : MBB.instrs()) { 1048 if (MI.getOpcode() != ARM::t2DoLoopStart) 1049 continue; 1050 Register R = MI.getOperand(1).getReg(); 1051 MachineFunction *MF = MI.getParent()->getParent(); 1052 MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0); 1053 Changed = true; 1054 } 1055 return Changed; 1056 } 1057 1058 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { 1059 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); 1060 1061 if (!STI.isThumb2() || !STI.hasLOB()) 1062 return false; 1063 1064 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); 1065 MRI = &Fn.getRegInfo(); 1066 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 1067 MachineDominatorTree *DT = 1068 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 1069 1070 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n" 1071 << "********** Function: " << Fn.getName() << '\n'); 1072 1073 bool Modified = false; 1074 for (MachineLoop *ML : MLI->getLoopsInPreorder()) { 1075 Modified |= LowerWhileLoopStart(ML); 1076 Modified |= MergeLoopEnd(ML); 1077 Modified |= ConvertTailPredLoop(ML, DT); 1078 } 1079 1080 for (MachineBasicBlock &MBB : Fn) { 1081 Modified |= HintDoLoopStartReg(MBB); 1082 Modified |= ReplaceConstByVPNOTs(MBB, DT); 1083 Modified |= ReplaceVCMPsByVPNOTs(MBB); 1084 Modified |= ReduceOldVCCRValueUses(MBB); 1085 Modified |= ConvertVPSEL(MBB); 1086 } 1087 1088 LLVM_DEBUG(dbgs() << "**************************************\n"); 1089 return Modified; 1090 } 1091 1092 /// createMVETPAndVPTOptimisationsPass 1093 FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() { 1094 return new MVETPAndVPTOptimisations(); 1095 } 1096