1 //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Break False Dependency pass. 10 /// 11 /// Some instructions have false dependencies which cause unnecessary stalls. 12 /// For example, instructions may write part of a register and implicitly 13 /// need to read the other parts of the register. This may cause unwanted 14 /// stalls preventing otherwise unrelated instructions from executing in 15 /// parallel in an out-of-order CPU. 16 /// This pass is aimed at identifying and avoiding these dependencies. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "llvm/ADT/DepthFirstIterator.h" 21 #include "llvm/CodeGen/LivePhysRegs.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/ReachingDefAnalysis.h" 24 #include "llvm/CodeGen/RegisterClassInfo.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/InitializePasses.h" 27 #include "llvm/MC/MCInstrDesc.h" 28 #include "llvm/MC/MCRegister.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/Support/Debug.h" 31 32 using namespace llvm; 33 34 namespace llvm { 35 36 class BreakFalseDeps : public MachineFunctionPass { 37 private: 38 MachineFunction *MF = nullptr; 39 const TargetInstrInfo *TII = nullptr; 40 const TargetRegisterInfo *TRI = nullptr; 41 RegisterClassInfo RegClassInfo; 42 43 /// List of undefined register reads in this block in forward order. 44 std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; 45 46 /// Storage for register unit liveness. 47 LivePhysRegs LiveRegSet; 48 49 ReachingDefAnalysis *RDA = nullptr; 50 51 public: 52 static char ID; // Pass identification, replacement for typeid 53 54 BreakFalseDeps() : MachineFunctionPass(ID) { 55 initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry()); 56 } 57 58 void getAnalysisUsage(AnalysisUsage &AU) const override { 59 AU.setPreservesAll(); 60 AU.addRequired<ReachingDefAnalysis>(); 61 MachineFunctionPass::getAnalysisUsage(AU); 62 } 63 64 bool runOnMachineFunction(MachineFunction &MF) override; 65 66 MachineFunctionProperties getRequiredProperties() const override { 67 return MachineFunctionProperties().set( 68 MachineFunctionProperties::Property::NoVRegs); 69 } 70 71 private: 72 /// Process he given basic block. 73 void processBasicBlock(MachineBasicBlock *MBB); 74 75 /// Update def-ages for registers defined by MI. 76 /// Also break dependencies on partial defs and undef uses. 77 void processDefs(MachineInstr *MI); 78 79 /// Helps avoid false dependencies on undef registers by updating the 80 /// machine instructions' undef operand to use a register that the instruction 81 /// is truly dependent on, or use a register with clearance higher than Pref. 82 /// Returns true if it was able to find a true dependency, thus not requiring 83 /// a dependency breaking instruction regardless of clearance. 84 bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, 85 unsigned Pref); 86 87 /// Return true to if it makes sense to break dependence on a partial 88 /// def or undef use. 89 bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref); 90 91 /// Break false dependencies on undefined register reads. 92 /// Walk the block backward computing precise liveness. This is expensive, so 93 /// we only do it on demand. Note that the occurrence of undefined register 94 /// reads that should be broken is very rare, but when they occur we may have 95 /// many in a single block. 96 void processUndefReads(MachineBasicBlock *); 97 }; 98 99 } // namespace llvm 100 101 #define DEBUG_TYPE "break-false-deps" 102 103 char BreakFalseDeps::ID = 0; 104 INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) 105 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) 106 INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) 107 108 FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); } 109 110 bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, 111 unsigned Pref) { 112 113 // We can't change tied operands. 114 if (MI->isRegTiedToDefOperand(OpIdx)) 115 return false; 116 117 MachineOperand &MO = MI->getOperand(OpIdx); 118 assert(MO.isUndef() && "Expected undef machine operand"); 119 120 // We can't change registers that aren't renamable. 121 if (!MO.isRenamable()) 122 return false; 123 124 MCRegister OriginalReg = MO.getReg().asMCReg(); 125 126 // Update only undef operands that have reg units that are mapped to one root. 127 for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { 128 unsigned NumRoots = 0; 129 for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) { 130 NumRoots++; 131 if (NumRoots > 1) 132 return false; 133 } 134 } 135 136 // Get the undef operand's register class 137 const TargetRegisterClass *OpRC = 138 TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); 139 assert(OpRC && "Not a valid register class"); 140 141 // If the instruction has a true dependency, we can hide the false depdency 142 // behind it. 143 for (MachineOperand &CurrMO : MI->operands()) { 144 if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || 145 !OpRC->contains(CurrMO.getReg())) 146 continue; 147 // We found a true dependency - replace the undef register with the true 148 // dependency. 149 MO.setReg(CurrMO.getReg()); 150 return true; 151 } 152 153 // Go over all registers in the register class and find the register with 154 // max clearance or clearance higher than Pref. 155 unsigned MaxClearance = 0; 156 unsigned MaxClearanceReg = OriginalReg; 157 ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC); 158 for (MCPhysReg Reg : Order) { 159 unsigned Clearance = RDA->getClearance(MI, Reg); 160 if (Clearance <= MaxClearance) 161 continue; 162 MaxClearance = Clearance; 163 MaxClearanceReg = Reg; 164 165 if (MaxClearance > Pref) 166 break; 167 } 168 169 // Update the operand if we found a register with better clearance. 170 if (MaxClearanceReg != OriginalReg) 171 MO.setReg(MaxClearanceReg); 172 173 return false; 174 } 175 176 bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, 177 unsigned Pref) { 178 MCRegister Reg = MI->getOperand(OpIdx).getReg().asMCReg(); 179 unsigned Clearance = RDA->getClearance(MI, Reg); 180 LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); 181 182 if (Pref > Clearance) { 183 LLVM_DEBUG(dbgs() << ": Break dependency.\n"); 184 return true; 185 } 186 LLVM_DEBUG(dbgs() << ": OK .\n"); 187 return false; 188 } 189 190 void BreakFalseDeps::processDefs(MachineInstr *MI) { 191 assert(!MI->isDebugInstr() && "Won't process debug values"); 192 193 const MCInstrDesc &MCID = MI->getDesc(); 194 195 // Break dependence on undef uses. Do this before updating LiveRegs below. 196 // This can remove a false dependence with no additional instructions. 197 for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) { 198 MachineOperand &MO = MI->getOperand(i); 199 if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef()) 200 continue; 201 202 unsigned Pref = TII->getUndefRegClearance(*MI, i, TRI); 203 if (Pref) { 204 bool HadTrueDependency = pickBestRegisterForUndef(MI, i, Pref); 205 // We don't need to bother trying to break a dependency if this 206 // instruction has a true dependency on that register through another 207 // operand - we'll have to wait for it to be available regardless. 208 if (!HadTrueDependency && shouldBreakDependence(MI, i, Pref)) 209 UndefReads.push_back(std::make_pair(MI, i)); 210 } 211 } 212 213 // The code below allows the target to create a new instruction to break the 214 // dependence. That opposes the goal of minimizing size, so bail out now. 215 if (MF->getFunction().hasMinSize()) 216 return; 217 218 for (unsigned i = 0, 219 e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); 220 i != e; ++i) { 221 MachineOperand &MO = MI->getOperand(i); 222 if (!MO.isReg() || !MO.getReg()) 223 continue; 224 if (MO.isUse()) 225 continue; 226 // Check clearance before partial register updates. 227 unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); 228 if (Pref && shouldBreakDependence(MI, i, Pref)) 229 TII->breakPartialRegDependency(*MI, i, TRI); 230 } 231 } 232 233 void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { 234 if (UndefReads.empty()) 235 return; 236 237 // The code below allows the target to create a new instruction to break the 238 // dependence. That opposes the goal of minimizing size, so bail out now. 239 if (MF->getFunction().hasMinSize()) 240 return; 241 242 // Collect this block's live out register units. 243 LiveRegSet.init(*TRI); 244 // We do not need to care about pristine registers as they are just preserved 245 // but not actually used in the function. 246 LiveRegSet.addLiveOutsNoPristines(*MBB); 247 248 MachineInstr *UndefMI = UndefReads.back().first; 249 unsigned OpIdx = UndefReads.back().second; 250 251 for (MachineInstr &I : llvm::reverse(*MBB)) { 252 // Update liveness, including the current instruction's defs. 253 LiveRegSet.stepBackward(I); 254 255 if (UndefMI == &I) { 256 if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) 257 TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); 258 259 UndefReads.pop_back(); 260 if (UndefReads.empty()) 261 return; 262 263 UndefMI = UndefReads.back().first; 264 OpIdx = UndefReads.back().second; 265 } 266 } 267 } 268 269 void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) { 270 UndefReads.clear(); 271 // If this block is not done, it makes little sense to make any decisions 272 // based on clearance information. We need to make a second pass anyway, 273 // and by then we'll have better information, so we can avoid doing the work 274 // to try and break dependencies now. 275 for (MachineInstr &MI : *MBB) { 276 if (!MI.isDebugInstr()) 277 processDefs(&MI); 278 } 279 processUndefReads(MBB); 280 } 281 282 bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) { 283 if (skipFunction(mf.getFunction())) 284 return false; 285 MF = &mf; 286 TII = MF->getSubtarget().getInstrInfo(); 287 TRI = MF->getSubtarget().getRegisterInfo(); 288 RDA = &getAnalysis<ReachingDefAnalysis>(); 289 290 RegClassInfo.runOnMachineFunction(mf); 291 292 LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n"); 293 294 // Skip Dead blocks due to ReachingDefAnalysis has no idea about instructions 295 // in them. 296 df_iterator_default_set<MachineBasicBlock *> Reachable; 297 for (MachineBasicBlock *MBB : depth_first_ext(&mf, Reachable)) 298 (void)MBB /* Mark all reachable blocks */; 299 300 // Traverse the basic blocks. 301 for (MachineBasicBlock &MBB : mf) 302 if (Reachable.count(&MBB)) 303 processBasicBlock(&MBB); 304 305 return false; 306 } 307