1 //===-- SIModeRegister.cpp - Mode Register --------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This pass inserts changes to the Mode register settings as required. 11 /// Note that currently it only deals with the Double Precision Floating Point 12 /// rounding mode setting, but is intended to be generic enough to be easily 13 /// expanded. 14 /// 15 //===----------------------------------------------------------------------===// 16 // 17 #include "AMDGPU.h" 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIInstrInfo.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/IR/Constants.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/LLVMContext.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include <queue> 33 34 #define DEBUG_TYPE "si-mode-register" 35 36 STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted."); 37 38 using namespace llvm; 39 40 struct Status { 41 // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a 42 // known value 43 unsigned Mask; 44 unsigned Mode; 45 46 Status() : Mask(0), Mode(0){}; 47 48 Status(unsigned Mask, unsigned Mode) : Mask(Mask), Mode(Mode) { 49 Mode &= Mask; 50 }; 51 52 // merge two status values such that only values that don't conflict are 53 // preserved 54 Status merge(const Status &S) const { 55 return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask))); 56 } 57 58 // merge an unknown value by using the unknown value's mask to remove bits 59 // from the result 60 Status mergeUnknown(unsigned newMask) { 61 return Status(Mask & ~newMask, Mode & ~newMask); 62 } 63 64 // intersect two Status values to produce a mode and mask that is a subset 65 // of both values 66 Status intersect(const Status &S) const { 67 unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode); 68 unsigned NewMode = (Mode & NewMask); 69 return Status(NewMask, NewMode); 70 } 71 72 // produce the delta required to change the Mode to the required Mode 73 Status delta(const Status &S) const { 74 return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode); 75 } 76 77 bool operator==(const Status &S) const { 78 return (Mask == S.Mask) && (Mode == S.Mode); 79 } 80 81 bool operator!=(const Status &S) const { return !(*this == S); } 82 83 bool isCompatible(Status &S) { 84 return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode); 85 } 86 87 bool isCombinable(Status &S) { 88 return !(Mask & S.Mask) || isCompatible(S); 89 } 90 }; 91 92 class BlockData { 93 public: 94 // The Status that represents the mode register settings required by the 95 // FirstInsertionPoint (if any) in this block. Calculated in Phase 1. 96 Status Require; 97 98 // The Status that represents the net changes to the Mode register made by 99 // this block, Calculated in Phase 1. 100 Status Change; 101 102 // The Status that represents the mode register settings on exit from this 103 // block. Calculated in Phase 2. 104 Status Exit; 105 106 // The Status that represents the intersection of exit Mode register settings 107 // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3. 108 Status Pred; 109 110 // In Phase 1 we record the first instruction that has a mode requirement, 111 // which is used in Phase 3 if we need to insert a mode change. 112 MachineInstr *FirstInsertionPoint; 113 114 BlockData() : FirstInsertionPoint(nullptr) {}; 115 }; 116 117 namespace { 118 119 class SIModeRegister : public MachineFunctionPass { 120 public: 121 static char ID; 122 123 std::vector<std::unique_ptr<BlockData>> BlockInfo; 124 std::queue<MachineBasicBlock *> Phase2List; 125 126 // The default mode register setting currently only caters for the floating 127 // point double precision rounding mode. 128 // We currently assume the default rounding mode is Round to Nearest 129 // NOTE: this should come from a per function rounding mode setting once such 130 // a setting exists. 131 unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST; 132 Status DefaultStatus = 133 Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode)); 134 135 public: 136 SIModeRegister() : MachineFunctionPass(ID) {} 137 138 bool runOnMachineFunction(MachineFunction &MF) override; 139 140 void getAnalysisUsage(AnalysisUsage &AU) const override { 141 AU.setPreservesCFG(); 142 MachineFunctionPass::getAnalysisUsage(AU); 143 } 144 145 void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII); 146 147 void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII); 148 149 void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII); 150 151 Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII); 152 153 void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I, 154 const SIInstrInfo *TII, Status InstrMode); 155 }; 156 } // End anonymous namespace. 157 158 INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE, 159 "Insert required mode register values", false, false) 160 161 char SIModeRegister::ID = 0; 162 163 char &llvm::SIModeRegisterID = SIModeRegister::ID; 164 165 FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); } 166 167 // Determine the Mode register setting required for this instruction. 168 // Instructions which don't use the Mode register return a null Status. 169 // Note this currently only deals with instructions that use the floating point 170 // double precision setting. 171 Status SIModeRegister::getInstructionMode(MachineInstr &MI, 172 const SIInstrInfo *TII) { 173 if (TII->usesFPDPRounding(MI)) { 174 switch (MI.getOpcode()) { 175 case AMDGPU::V_INTERP_P1LL_F16: 176 case AMDGPU::V_INTERP_P1LV_F16: 177 case AMDGPU::V_INTERP_P2_F16: 178 // f16 interpolation instructions need double precision round to zero 179 return Status(FP_ROUND_MODE_DP(3), 180 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)); 181 default: 182 return DefaultStatus; 183 } 184 } 185 return Status(); 186 } 187 188 // Insert a setreg instruction to update the Mode register. 189 // It is possible (though unlikely) for an instruction to require a change to 190 // the value of disjoint parts of the Mode register when we don't know the 191 // value of the intervening bits. In that case we need to use more than one 192 // setreg instruction. 193 void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI, 194 const SIInstrInfo *TII, Status InstrMode) { 195 while (InstrMode.Mask) { 196 unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask); 197 unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset); 198 unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1); 199 BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32)) 200 .addImm(Value) 201 .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) | 202 (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) | 203 (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_)); 204 ++NumSetregInserted; 205 InstrMode.Mask &= ~(((1 << Width) - 1) << Offset); 206 } 207 } 208 209 // In Phase 1 we iterate through the instructions of the block and for each 210 // instruction we get its mode usage. If the instruction uses the Mode register 211 // we: 212 // - update the Change status, which tracks the changes to the Mode register 213 // made by this block 214 // - if this instruction's requirements are compatible with the current setting 215 // of the Mode register we merge the modes 216 // - if it isn't compatible and an InsertionPoint isn't set, then we set the 217 // InsertionPoint to the current instruction, and we remember the current 218 // mode 219 // - if it isn't compatible and InsertionPoint is set we insert a seteg before 220 // that instruction (unless this instruction forms part of the block's 221 // entry requirements in which case the insertion is deferred until Phase 3 222 // when predecessor exit values are known), and move the insertion point to 223 // this instruction 224 // - if this is a setreg instruction we treat it as an incompatible instruction. 225 // This is sub-optimal but avoids some nasty corner cases, and is expected to 226 // occur very rarely. 227 // - on exit we have set the Require, Change, and initial Exit modes. 228 void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB, 229 const SIInstrInfo *TII) { 230 auto NewInfo = llvm::make_unique<BlockData>(); 231 MachineInstr *InsertionPoint = nullptr; 232 // RequirePending is used to indicate whether we are collecting the initial 233 // requirements for the block, and need to defer the first InsertionPoint to 234 // Phase 3. It is set to false once we have set FirstInsertionPoint, or when 235 // we discover an explict setreg that means this block doesn't have any 236 // initial requirements. 237 bool RequirePending = true; 238 Status IPChange; 239 for (MachineInstr &MI : MBB) { 240 Status InstrMode = getInstructionMode(MI, TII); 241 if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) || 242 (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) { 243 // We preserve any explicit mode register setreg instruction we encounter, 244 // as we assume it has been inserted by a higher authority (this is 245 // likely to be a very rare occurrence). 246 unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm(); 247 if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) != 248 AMDGPU::Hwreg::ID_MODE) 249 continue; 250 251 unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >> 252 AMDGPU::Hwreg::WIDTH_M1_SHIFT_) + 253 1; 254 unsigned Offset = 255 (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_; 256 unsigned Mask = ((1 << Width) - 1) << Offset; 257 258 // If an InsertionPoint is set we will insert a setreg there. 259 if (InsertionPoint) { 260 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); 261 InsertionPoint = nullptr; 262 } 263 // If this is an immediate then we know the value being set, but if it is 264 // not an immediate then we treat the modified bits of the mode register 265 // as unknown. 266 if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) { 267 unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm(); 268 unsigned Mode = (Val << Offset) & Mask; 269 Status Setreg = Status(Mask, Mode); 270 // If we haven't already set the initial requirements for the block we 271 // don't need to as the requirements start from this explicit setreg. 272 RequirePending = false; 273 NewInfo->Change = NewInfo->Change.merge(Setreg); 274 } else { 275 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask); 276 } 277 } else if (!NewInfo->Change.isCompatible(InstrMode)) { 278 // This instruction uses the Mode register and its requirements aren't 279 // compatible with the current mode. 280 if (InsertionPoint) { 281 // If the required mode change cannot be included in the current 282 // InsertionPoint changes, we need a setreg and start a new 283 // InsertionPoint. 284 if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) { 285 if (RequirePending) { 286 // This is the first insertionPoint in the block so we will defer 287 // the insertion of the setreg to Phase 3 where we know whether or 288 // not it is actually needed. 289 NewInfo->FirstInsertionPoint = InsertionPoint; 290 NewInfo->Require = NewInfo->Change; 291 RequirePending = false; 292 } else { 293 insertSetreg(MBB, InsertionPoint, TII, 294 IPChange.delta(NewInfo->Change)); 295 IPChange = NewInfo->Change; 296 } 297 // Set the new InsertionPoint 298 InsertionPoint = &MI; 299 } 300 NewInfo->Change = NewInfo->Change.merge(InstrMode); 301 } else { 302 // No InsertionPoint is currently set - this is either the first in 303 // the block or we have previously seen an explicit setreg. 304 InsertionPoint = &MI; 305 IPChange = NewInfo->Change; 306 NewInfo->Change = NewInfo->Change.merge(InstrMode); 307 } 308 } 309 } 310 if (RequirePending) { 311 // If we haven't yet set the initial requirements for the block we set them 312 // now. 313 NewInfo->FirstInsertionPoint = InsertionPoint; 314 NewInfo->Require = NewInfo->Change; 315 } else if (InsertionPoint) { 316 // We need to insert a setreg at the InsertionPoint 317 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); 318 } 319 NewInfo->Exit = NewInfo->Change; 320 BlockInfo[MBB.getNumber()] = std::move(NewInfo); 321 } 322 323 // In Phase 2 we revisit each block and calculate the common Mode register 324 // value provided by all predecessor blocks. If the Exit value for the block 325 // is changed, then we add the successor blocks to the worklist so that the 326 // exit value is propagated. 327 void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB, 328 const SIInstrInfo *TII) { 329 // BlockData *BI = BlockInfo[MBB.getNumber()]; 330 unsigned ThisBlock = MBB.getNumber(); 331 if (MBB.pred_empty()) { 332 // There are no predecessors, so use the default starting status. 333 BlockInfo[ThisBlock]->Pred = DefaultStatus; 334 } else { 335 // Build a status that is common to all the predecessors by intersecting 336 // all the predecessor exit status values. 337 MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end(); 338 MachineBasicBlock &PB = *(*P); 339 BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit; 340 341 for (P = std::next(P); P != E; P = std::next(P)) { 342 MachineBasicBlock *Pred = *P; 343 BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit); 344 } 345 } 346 Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change); 347 if (BlockInfo[ThisBlock]->Exit != TmpStatus) { 348 BlockInfo[ThisBlock]->Exit = TmpStatus; 349 // Add the successors to the work list so we can propagate the changed exit 350 // status. 351 for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(), 352 E = MBB.succ_end(); 353 S != E; S = std::next(S)) { 354 MachineBasicBlock &B = *(*S); 355 Phase2List.push(&B); 356 } 357 } 358 } 359 360 // In Phase 3 we revisit each block and if it has an insertion point defined we 361 // check whether the predecessor mode meets the block's entry requirements. If 362 // not we insert an appropriate setreg instruction to modify the Mode register. 363 void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB, 364 const SIInstrInfo *TII) { 365 // BlockData *BI = BlockInfo[MBB.getNumber()]; 366 unsigned ThisBlock = MBB.getNumber(); 367 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) { 368 Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require); 369 if (BlockInfo[ThisBlock]->FirstInsertionPoint) 370 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta); 371 else 372 insertSetreg(MBB, &MBB.instr_front(), TII, Delta); 373 } 374 } 375 376 bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) { 377 BlockInfo.resize(MF.getNumBlockIDs()); 378 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 379 const SIInstrInfo *TII = ST.getInstrInfo(); 380 381 // Processing is performed in a number of phases 382 383 // Phase 1 - determine the initial mode required by each block, and add setreg 384 // instructions for intra block requirements. 385 for (MachineBasicBlock &BB : MF) 386 processBlockPhase1(BB, TII); 387 388 // Phase 2 - determine the exit mode from each block. We add all blocks to the 389 // list here, but will also add any that need to be revisited during Phase 2 390 // processing. 391 for (MachineBasicBlock &BB : MF) 392 Phase2List.push(&BB); 393 while (!Phase2List.empty()) { 394 processBlockPhase2(*Phase2List.front(), TII); 395 Phase2List.pop(); 396 } 397 398 // Phase 3 - add an initial setreg to each block where the required entry mode 399 // is not satisfied by the exit mode of all its predecessors. 400 for (MachineBasicBlock &BB : MF) 401 processBlockPhase3(BB, TII); 402 403 BlockInfo.clear(); 404 405 return NumSetregInserted > 0; 406 } 407