1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo 10 /// instructions into machine operations. 11 /// The expectation is that the loop contains three pseudo instructions: 12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop 13 /// form should be in the preheader, whereas the while form should be in the 14 /// preheaders only predecessor. 15 /// - t2LoopDec - placed within in the loop body. 16 /// - t2LoopEnd - the loop latch terminator. 17 /// 18 //===----------------------------------------------------------------------===// 19 20 #include "ARM.h" 21 #include "ARMBaseInstrInfo.h" 22 #include "ARMBaseRegisterInfo.h" 23 #include "ARMBasicBlockInfo.h" 24 #include "ARMSubtarget.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineLoopInfo.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "arm-low-overhead-loops" 32 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass" 33 34 namespace { 35 36 class ARMLowOverheadLoops : public MachineFunctionPass { 37 const ARMBaseInstrInfo *TII = nullptr; 38 MachineRegisterInfo *MRI = nullptr; 39 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr; 40 41 public: 42 static char ID; 43 44 ARMLowOverheadLoops() : MachineFunctionPass(ID) { } 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.setPreservesCFG(); 48 AU.addRequired<MachineLoopInfo>(); 49 MachineFunctionPass::getAnalysisUsage(AU); 50 } 51 52 bool runOnMachineFunction(MachineFunction &MF) override; 53 54 bool ProcessLoop(MachineLoop *ML); 55 56 bool RevertNonLoops(MachineFunction &MF); 57 58 void RevertWhile(MachineInstr *MI) const; 59 60 void RevertLoopDec(MachineInstr *MI) const; 61 62 void RevertLoopEnd(MachineInstr *MI) const; 63 64 void Expand(MachineLoop *ML, MachineInstr *Start, 65 MachineInstr *Dec, MachineInstr *End, bool Revert); 66 67 MachineFunctionProperties getRequiredProperties() const override { 68 return MachineFunctionProperties().set( 69 MachineFunctionProperties::Property::NoVRegs); 70 } 71 72 StringRef getPassName() const override { 73 return ARM_LOW_OVERHEAD_LOOPS_NAME; 74 } 75 }; 76 } 77 78 char ARMLowOverheadLoops::ID = 0; 79 80 INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, 81 false, false) 82 83 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) { 84 if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB()) 85 return false; 86 87 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n"); 88 89 auto &MLI = getAnalysis<MachineLoopInfo>(); 90 MRI = &MF.getRegInfo(); 91 TII = static_cast<const ARMBaseInstrInfo*>( 92 MF.getSubtarget().getInstrInfo()); 93 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF)); 94 BBUtils->computeAllBlockSizes(); 95 BBUtils->adjustBBOffsetsAfter(&MF.front()); 96 97 bool Changed = false; 98 for (auto ML : MLI) { 99 if (!ML->getParentLoop()) 100 Changed |= ProcessLoop(ML); 101 } 102 Changed |= RevertNonLoops(MF); 103 return Changed; 104 } 105 106 static bool IsLoopStart(MachineInstr &MI) { 107 return MI.getOpcode() == ARM::t2DoLoopStart || 108 MI.getOpcode() == ARM::t2WhileLoopStart; 109 } 110 111 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { 112 113 bool Changed = false; 114 115 // Process inner loops first. 116 for (auto I = ML->begin(), E = ML->end(); I != E; ++I) 117 Changed |= ProcessLoop(*I); 118 119 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); 120 121 // Search the given block for a loop start instruction. If one isn't found, 122 // and there's only one predecessor block, search that one too. 123 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart = 124 [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { 125 for (auto &MI : *MBB) { 126 if (IsLoopStart(MI)) 127 return &MI; 128 } 129 if (MBB->pred_size() == 1) 130 return SearchForStart(*MBB->pred_begin()); 131 return nullptr; 132 }; 133 134 MachineInstr *Start = nullptr; 135 MachineInstr *Dec = nullptr; 136 MachineInstr *End = nullptr; 137 bool Revert = false; 138 139 // Search the preheader for the start intrinsic, or look through the 140 // predecessors of the header to find exactly one set.iterations intrinsic. 141 // FIXME: I don't see why we shouldn't be supporting multiple predecessors 142 // with potentially multiple set.loop.iterations, so we need to enable this. 143 if (auto *Preheader = ML->getLoopPreheader()) { 144 Start = SearchForStart(Preheader); 145 } else { 146 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n" 147 << " - Performing manual predecessor search.\n"); 148 MachineBasicBlock *Pred = nullptr; 149 for (auto *MBB : ML->getHeader()->predecessors()) { 150 if (!ML->contains(MBB)) { 151 if (Pred) { 152 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n"); 153 Start = nullptr; 154 break; 155 } 156 Pred = MBB; 157 Start = SearchForStart(MBB); 158 } 159 } 160 } 161 162 // Find the low-overhead loop components and decide whether or not to fall 163 // back to a normal loop. 164 for (auto *MBB : reverse(ML->getBlocks())) { 165 for (auto &MI : *MBB) { 166 if (MI.getOpcode() == ARM::t2LoopDec) 167 Dec = &MI; 168 else if (MI.getOpcode() == ARM::t2LoopEnd) 169 End = &MI; 170 else if (IsLoopStart(MI)) 171 Start = &MI; 172 else if (MI.getDesc().isCall()) 173 // TODO: Though the call will require LE to execute again, does this 174 // mean we should revert? Always executing LE hopefully should be 175 // faster than performing a sub,cmp,br or even subs,br. 176 Revert = true; 177 178 if (!Dec || End) 179 continue; 180 181 // If we find that LR has been written or read between LoopDec and 182 // LoopEnd, expect that the decremented value is being used else where. 183 // Because this value isn't actually going to be produced until the 184 // latch, by LE, we would need to generate a real sub. The value is also 185 // likely to be copied/reloaded for use of LoopEnd - in which in case 186 // we'd need to perform an add because it gets subtracted again by LE! 187 // The other option is to then generate the other form of LE which doesn't 188 // perform the sub. 189 for (auto &MO : MI.operands()) { 190 if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() && 191 MO.getReg() == ARM::LR) { 192 LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI); 193 Revert = true; 194 break; 195 } 196 } 197 } 198 199 if (Dec && End && Revert) 200 break; 201 } 202 203 LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; 204 if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; 205 if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;); 206 207 if (!Start && !Dec && !End) { 208 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n"); 209 return Changed; 210 } else if (!(Start && Dec && End)) { 211 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n"); 212 return false; 213 } 214 215 if (!End->getOperand(1).isMBB()) 216 report_fatal_error("Expected LoopEnd to target basic block"); 217 218 // TODO Maybe there's cases where the target doesn't have to be the header, 219 // but for now be safe and revert. 220 if (End->getOperand(1).getMBB() != ML->getHeader()) { 221 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n"); 222 Revert = true; 223 } 224 225 // The WLS and LE instructions have 12-bits for the label offset. WLS 226 // requires a positive offset, while LE uses negative. 227 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) || 228 !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) { 229 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n"); 230 Revert = true; 231 } 232 if (Start->getOpcode() == ARM::t2WhileLoopStart && 233 (BBUtils->getOffsetOf(Start) > 234 BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) || 235 !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) { 236 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n"); 237 Revert = true; 238 } 239 240 Expand(ML, Start, Dec, End, Revert); 241 return true; 242 } 243 244 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a 245 // beq that branches to the exit branch. 246 // FIXME: Need to check that we're not trashing the CPSR when generating the 247 // cmp. We could also try to generate a cbz if the value in LR is also in 248 // another low register. 249 void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const { 250 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI); 251 MachineBasicBlock *MBB = MI->getParent(); 252 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), 253 TII->get(ARM::t2CMPri)); 254 MIB.add(MI->getOperand(0)); 255 MIB.addImm(0); 256 MIB.addImm(ARMCC::AL); 257 MIB.addReg(ARM::NoRegister); 258 259 // TODO: Try to use tBcc instead 260 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc)); 261 MIB.add(MI->getOperand(1)); // branch target 262 MIB.addImm(ARMCC::EQ); // condition code 263 MIB.addReg(ARM::CPSR); 264 MI->eraseFromParent(); 265 } 266 267 // TODO: Check flags so that we can possibly generate a tSubs or tSub. 268 void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { 269 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI); 270 MachineBasicBlock *MBB = MI->getParent(); 271 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), 272 TII->get(ARM::t2SUBri)); 273 MIB.addDef(ARM::LR); 274 MIB.add(MI->getOperand(1)); 275 MIB.add(MI->getOperand(2)); 276 MIB.addImm(ARMCC::AL); 277 MIB.addReg(0); 278 MIB.addReg(0); 279 MI->eraseFromParent(); 280 } 281 282 // Generate a subs, or sub and cmp, and a branch instead of an LE. 283 // FIXME: Need to check that we're not trashing the CPSR when generating 284 // the cmp. 285 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const { 286 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI); 287 288 // Create cmp 289 MachineBasicBlock *MBB = MI->getParent(); 290 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), 291 TII->get(ARM::t2CMPri)); 292 MIB.addReg(ARM::LR); 293 MIB.addImm(0); 294 MIB.addImm(ARMCC::AL); 295 MIB.addReg(ARM::NoRegister); 296 297 // TODO Try to use tBcc instead. 298 // Create bne 299 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc)); 300 MIB.add(MI->getOperand(1)); // branch target 301 MIB.addImm(ARMCC::NE); // condition code 302 MIB.addReg(ARM::CPSR); 303 MI->eraseFromParent(); 304 } 305 306 void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, 307 MachineInstr *Dec, MachineInstr *End, 308 bool Revert) { 309 310 auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) { 311 // The trip count should already been held in LR since the instructions 312 // within the loop can only read and write to LR. So, there should be a 313 // mov to setup the count. WLS/DLS perform this move, so find the original 314 // and delete it - inserting WLS/DLS in its place. 315 MachineBasicBlock *MBB = Start->getParent(); 316 MachineInstr *InsertPt = Start; 317 for (auto &I : MRI->def_instructions(ARM::LR)) { 318 if (I.getParent() != MBB) 319 continue; 320 321 // Always execute. 322 if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL) 323 continue; 324 325 // Only handle move reg, if the trip count it will need moving into a reg 326 // before the setup instruction anyway. 327 if (!I.getDesc().isMoveReg() || 328 !I.getOperand(1).isIdenticalTo(Start->getOperand(0))) 329 continue; 330 InsertPt = &I; 331 break; 332 } 333 334 unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ? 335 ARM::t2DLS : ARM::t2WLS; 336 MachineInstrBuilder MIB = 337 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc)); 338 339 MIB.addDef(ARM::LR); 340 MIB.add(Start->getOperand(0)); 341 if (Opc == ARM::t2WLS) 342 MIB.add(Start->getOperand(1)); 343 344 if (InsertPt != Start) 345 InsertPt->eraseFromParent(); 346 Start->eraseFromParent(); 347 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB); 348 return &*MIB; 349 }; 350 351 // Combine the LoopDec and LoopEnd instructions into LE(TP). 352 auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec, 353 MachineInstr *End) { 354 MachineBasicBlock *MBB = End->getParent(); 355 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(), 356 TII->get(ARM::t2LEUpdate)); 357 MIB.addDef(ARM::LR); 358 MIB.add(End->getOperand(0)); 359 MIB.add(End->getOperand(1)); 360 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB); 361 362 End->eraseFromParent(); 363 Dec->eraseFromParent(); 364 return &*MIB; 365 }; 366 367 // TODO: We should be able to automatically remove these branches before we 368 // get here - probably by teaching analyzeBranch about the pseudo 369 // instructions. 370 // If there is an unconditional branch, after I, that just branches to the 371 // next block, remove it. 372 auto RemoveDeadBranch = [](MachineInstr *I) { 373 MachineBasicBlock *BB = I->getParent(); 374 MachineInstr *Terminator = &BB->instr_back(); 375 if (Terminator->isUnconditionalBranch() && I != Terminator) { 376 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB(); 377 if (BB->isLayoutSuccessor(Succ)) { 378 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator); 379 Terminator->eraseFromParent(); 380 } 381 } 382 }; 383 384 if (Revert) { 385 if (Start->getOpcode() == ARM::t2WhileLoopStart) 386 RevertWhile(Start); 387 else 388 Start->eraseFromParent(); 389 RevertLoopDec(Dec); 390 RevertLoopEnd(End); 391 } else { 392 Start = ExpandLoopStart(ML, Start); 393 RemoveDeadBranch(Start); 394 End = ExpandLoopEnd(ML, Dec, End); 395 RemoveDeadBranch(End); 396 } 397 } 398 399 bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) { 400 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n"); 401 bool Changed = false; 402 403 for (auto &MBB : MF) { 404 SmallVector<MachineInstr*, 4> Starts; 405 SmallVector<MachineInstr*, 4> Decs; 406 SmallVector<MachineInstr*, 4> Ends; 407 408 for (auto &I : MBB) { 409 if (IsLoopStart(I)) 410 Starts.push_back(&I); 411 else if (I.getOpcode() == ARM::t2LoopDec) 412 Decs.push_back(&I); 413 else if (I.getOpcode() == ARM::t2LoopEnd) 414 Ends.push_back(&I); 415 } 416 417 if (Starts.empty() && Decs.empty() && Ends.empty()) 418 continue; 419 420 Changed = true; 421 422 for (auto *Start : Starts) { 423 if (Start->getOpcode() == ARM::t2WhileLoopStart) 424 RevertWhile(Start); 425 else 426 Start->eraseFromParent(); 427 } 428 for (auto *Dec : Decs) 429 RevertLoopDec(Dec); 430 431 for (auto *End : Ends) 432 RevertLoopEnd(End); 433 } 434 return Changed; 435 } 436 437 FunctionPass *llvm::createARMLowOverheadLoopsPass() { 438 return new ARMLowOverheadLoops(); 439 } 440