1 //===--- SIMemoryLegalizer.cpp ----------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Memory legalizer - implements memory model. More information can be 12 /// found here: 13 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model 14 /// 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "AMDGPUMachineModuleInfo.h" 20 #include "AMDGPUSubtarget.h" 21 #include "Utils/AMDGPUBaseInfo.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/IR/DiagnosticInfo.h" 25 26 using namespace llvm; 27 using namespace llvm::AMDGPU; 28 29 #define DEBUG_TYPE "si-memory-legalizer" 30 #define PASS_NAME "SI Memory Legalizer" 31 32 namespace { 33 34 class SIMemoryLegalizer final : public MachineFunctionPass { 35 private: 36 struct AtomicInfo final { 37 SyncScope::ID SSID = SyncScope::System; 38 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent; 39 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent; 40 41 AtomicInfo() {} 42 43 AtomicInfo(SyncScope::ID SSID, 44 AtomicOrdering Ordering, 45 AtomicOrdering FailureOrdering) 46 : SSID(SSID), 47 Ordering(Ordering), 48 FailureOrdering(FailureOrdering) {} 49 50 AtomicInfo(const MachineMemOperand *MMO) 51 : SSID(MMO->getSyncScopeID()), 52 Ordering(MMO->getOrdering()), 53 FailureOrdering(MMO->getFailureOrdering()) {} 54 }; 55 56 /// \brief LLVM context. 57 LLVMContext *CTX = nullptr; 58 /// \brief Machine module info. 59 const AMDGPUMachineModuleInfo *MMI = nullptr; 60 /// \brief Instruction info. 61 const SIInstrInfo *TII = nullptr; 62 63 /// \brief Immediate for "vmcnt(0)". 64 unsigned Vmcnt0Immediate = 0; 65 /// \brief Opcode for cache invalidation instruction (L1). 66 unsigned Wbinvl1Opcode = 0; 67 68 /// \brief List of atomic pseudo instructions. 69 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs; 70 71 /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. 72 /// Always returns true. 73 bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 74 bool Before = true) const; 75 /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. 76 /// Always returns true. 77 bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 78 bool Before = true) const; 79 80 /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is 81 /// modified, false otherwise. 82 bool setGLC(const MachineBasicBlock::iterator &MI) const; 83 84 /// \brief Removes all processed atomic pseudo instructions from the current 85 /// function. Returns true if current function is modified, false otherwise. 86 bool removeAtomicPseudoMIs(); 87 88 /// \brief Reports unknown synchronization scope used in \p MI to LLVM 89 /// context. 90 void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI); 91 92 /// \returns Atomic fence info if \p MI is an atomic fence operation, 93 /// "None" otherwise. 94 Optional<AtomicInfo> getAtomicFenceInfo( 95 const MachineBasicBlock::iterator &MI) const; 96 /// \returns Atomic load info if \p MI is an atomic load operation, 97 /// "None" otherwise. 98 Optional<AtomicInfo> getAtomicLoadInfo( 99 const MachineBasicBlock::iterator &MI) const; 100 /// \returns Atomic store info if \p MI is an atomic store operation, 101 /// "None" otherwise. 102 Optional<AtomicInfo> getAtomicStoreInfo( 103 const MachineBasicBlock::iterator &MI) const; 104 /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation, 105 /// "None" otherwise. 106 Optional<AtomicInfo> getAtomicCmpxchgInfo( 107 const MachineBasicBlock::iterator &MI) const; 108 /// \returns Atomic rmw info if \p MI is an atomic rmw operation, 109 /// "None" otherwise. 110 Optional<AtomicInfo> getAtomicRmwInfo( 111 const MachineBasicBlock::iterator &MI) const; 112 113 /// \brief Expands atomic fence operation \p MI. Returns true if 114 /// instructions are added/deleted or \p MI is modified, false otherwise. 115 bool expandAtomicFence(const AtomicInfo &AI, 116 MachineBasicBlock::iterator &MI); 117 /// \brief Expands atomic load operation \p MI. Returns true if 118 /// instructions are added/deleted or \p MI is modified, false otherwise. 119 bool expandAtomicLoad(const AtomicInfo &AI, 120 MachineBasicBlock::iterator &MI); 121 /// \brief Expands atomic store operation \p MI. Returns true if 122 /// instructions are added/deleted or \p MI is modified, false otherwise. 123 bool expandAtomicStore(const AtomicInfo &AI, 124 MachineBasicBlock::iterator &MI); 125 /// \brief Expands atomic cmpxchg operation \p MI. Returns true if 126 /// instructions are added/deleted or \p MI is modified, false otherwise. 127 bool expandAtomicCmpxchg(const AtomicInfo &AI, 128 MachineBasicBlock::iterator &MI); 129 /// \brief Expands atomic rmw operation \p MI. Returns true if 130 /// instructions are added/deleted or \p MI is modified, false otherwise. 131 bool expandAtomicRmw(const AtomicInfo &AI, 132 MachineBasicBlock::iterator &MI); 133 134 public: 135 static char ID; 136 137 SIMemoryLegalizer() 138 : MachineFunctionPass(ID) {} 139 140 void getAnalysisUsage(AnalysisUsage &AU) const override { 141 AU.setPreservesCFG(); 142 MachineFunctionPass::getAnalysisUsage(AU); 143 } 144 145 StringRef getPassName() const override { 146 return PASS_NAME; 147 } 148 149 bool runOnMachineFunction(MachineFunction &MF) override; 150 }; 151 152 } // end namespace anonymous 153 154 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 155 bool Before) const { 156 MachineBasicBlock &MBB = *MI->getParent(); 157 DebugLoc DL = MI->getDebugLoc(); 158 159 if (!Before) 160 ++MI; 161 162 BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode)); 163 164 if (!Before) 165 --MI; 166 167 return true; 168 } 169 170 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 171 bool Before) const { 172 MachineBasicBlock &MBB = *MI->getParent(); 173 DebugLoc DL = MI->getDebugLoc(); 174 175 if (!Before) 176 ++MI; 177 178 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); 179 180 if (!Before) 181 --MI; 182 183 return true; 184 } 185 186 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const { 187 int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc); 188 if (GLCIdx == -1) 189 return false; 190 191 MachineOperand &GLC = MI->getOperand(GLCIdx); 192 if (GLC.getImm() == 1) 193 return false; 194 195 GLC.setImm(1); 196 return true; 197 } 198 199 bool SIMemoryLegalizer::removeAtomicPseudoMIs() { 200 if (AtomicPseudoMIs.empty()) 201 return false; 202 203 for (auto &MI : AtomicPseudoMIs) 204 MI->eraseFromParent(); 205 206 AtomicPseudoMIs.clear(); 207 return true; 208 } 209 210 void SIMemoryLegalizer::reportUnknownSynchScope( 211 const MachineBasicBlock::iterator &MI) { 212 DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(), 213 "Unsupported synchronization scope"); 214 CTX->diagnose(Diag); 215 } 216 217 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicFenceInfo( 218 const MachineBasicBlock::iterator &MI) const { 219 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 220 221 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) 222 return None; 223 224 SyncScope::ID SSID = 225 static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); 226 AtomicOrdering Ordering = 227 static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); 228 return AtomicInfo(SSID, Ordering, AtomicOrdering::NotAtomic); 229 } 230 231 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicLoadInfo( 232 const MachineBasicBlock::iterator &MI) const { 233 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 234 235 if (!(MI->mayLoad() && !MI->mayStore())) 236 return None; 237 if (!MI->hasOneMemOperand()) 238 return AtomicInfo(); 239 240 const MachineMemOperand *MMO = *MI->memoperands_begin(); 241 if (!MMO->isAtomic()) 242 return None; 243 244 return AtomicInfo(MMO); 245 } 246 247 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicStoreInfo( 248 const MachineBasicBlock::iterator &MI) const { 249 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 250 251 if (!(!MI->mayLoad() && MI->mayStore())) 252 return None; 253 if (!MI->hasOneMemOperand()) 254 return AtomicInfo(); 255 256 const MachineMemOperand *MMO = *MI->memoperands_begin(); 257 if (!MMO->isAtomic()) 258 return None; 259 260 return AtomicInfo(MMO); 261 } 262 263 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicCmpxchgInfo( 264 const MachineBasicBlock::iterator &MI) const { 265 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 266 267 if (!(MI->mayLoad() && MI->mayStore())) 268 return None; 269 if (!MI->hasOneMemOperand()) 270 return AtomicInfo(); 271 272 const MachineMemOperand *MMO = *MI->memoperands_begin(); 273 if (!MMO->isAtomic()) 274 return None; 275 if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic) 276 return None; 277 278 return AtomicInfo(MMO); 279 } 280 281 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicRmwInfo( 282 const MachineBasicBlock::iterator &MI) const { 283 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 284 285 if (!(MI->mayLoad() && MI->mayStore())) 286 return None; 287 if (!MI->hasOneMemOperand()) 288 return AtomicInfo(); 289 290 const MachineMemOperand *MMO = *MI->memoperands_begin(); 291 if (!MMO->isAtomic()) 292 return None; 293 if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 294 return None; 295 296 return AtomicInfo(MMO); 297 } 298 299 bool SIMemoryLegalizer::expandAtomicFence(const AtomicInfo &AI, 300 MachineBasicBlock::iterator &MI) { 301 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); 302 303 bool Changed = false; 304 if (AI.SSID == SyncScope::System || 305 AI.SSID == MMI->getAgentSSID()) { 306 if (AI.Ordering == AtomicOrdering::Acquire || 307 AI.Ordering == AtomicOrdering::Release || 308 AI.Ordering == AtomicOrdering::AcquireRelease || 309 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 310 Changed |= insertWaitcntVmcnt0(MI); 311 312 if (AI.Ordering == AtomicOrdering::Acquire || 313 AI.Ordering == AtomicOrdering::AcquireRelease || 314 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 315 Changed |= insertBufferWbinvl1Vol(MI); 316 317 AtomicPseudoMIs.push_back(MI); 318 return Changed; 319 } else if (AI.SSID == SyncScope::SingleThread || 320 AI.SSID == MMI->getWorkgroupSSID() || 321 AI.SSID == MMI->getWavefrontSSID()) { 322 AtomicPseudoMIs.push_back(MI); 323 return Changed; 324 } else { 325 reportUnknownSynchScope(MI); 326 return Changed; 327 } 328 } 329 330 bool SIMemoryLegalizer::expandAtomicLoad(const AtomicInfo &AI, 331 MachineBasicBlock::iterator &MI) { 332 assert(MI->mayLoad() && !MI->mayStore()); 333 334 bool Changed = false; 335 if (AI.SSID == SyncScope::System || 336 AI.SSID == MMI->getAgentSSID()) { 337 if (AI.Ordering == AtomicOrdering::Acquire || 338 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 339 Changed |= setGLC(MI); 340 341 if (AI.Ordering == AtomicOrdering::SequentiallyConsistent) 342 Changed |= insertWaitcntVmcnt0(MI); 343 344 if (AI.Ordering == AtomicOrdering::Acquire || 345 AI.Ordering == AtomicOrdering::SequentiallyConsistent) { 346 Changed |= insertWaitcntVmcnt0(MI, false); 347 Changed |= insertBufferWbinvl1Vol(MI, false); 348 } 349 350 return Changed; 351 } else if (AI.SSID == SyncScope::SingleThread || 352 AI.SSID == MMI->getWorkgroupSSID() || 353 AI.SSID == MMI->getWavefrontSSID()) { 354 return Changed; 355 } else { 356 reportUnknownSynchScope(MI); 357 return Changed; 358 } 359 } 360 361 bool SIMemoryLegalizer::expandAtomicStore(const AtomicInfo &AI, 362 MachineBasicBlock::iterator &MI) { 363 assert(!MI->mayLoad() && MI->mayStore()); 364 365 bool Changed = false; 366 if (AI.SSID == SyncScope::System || 367 AI.SSID == MMI->getAgentSSID()) { 368 if (AI.Ordering == AtomicOrdering::Release || 369 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 370 Changed |= insertWaitcntVmcnt0(MI); 371 372 return Changed; 373 } else if (AI.SSID == SyncScope::SingleThread || 374 AI.SSID == MMI->getWorkgroupSSID() || 375 AI.SSID == MMI->getWavefrontSSID()) { 376 return Changed; 377 } else { 378 reportUnknownSynchScope(MI); 379 return Changed; 380 } 381 } 382 383 bool SIMemoryLegalizer::expandAtomicCmpxchg(const AtomicInfo &AI, 384 MachineBasicBlock::iterator &MI) { 385 assert(MI->mayLoad() && MI->mayStore()); 386 387 bool Changed = false; 388 if (AI.SSID == SyncScope::System || 389 AI.SSID == MMI->getAgentSSID()) { 390 if (AI.Ordering == AtomicOrdering::Release || 391 AI.Ordering == AtomicOrdering::AcquireRelease || 392 AI.Ordering == AtomicOrdering::SequentiallyConsistent || 393 AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) 394 Changed |= insertWaitcntVmcnt0(MI); 395 396 if (AI.Ordering == AtomicOrdering::Acquire || 397 AI.Ordering == AtomicOrdering::AcquireRelease || 398 AI.Ordering == AtomicOrdering::SequentiallyConsistent || 399 AI.FailureOrdering == AtomicOrdering::Acquire || 400 AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) { 401 Changed |= insertWaitcntVmcnt0(MI, false); 402 Changed |= insertBufferWbinvl1Vol(MI, false); 403 } 404 405 return Changed; 406 } else if (AI.SSID == SyncScope::SingleThread || 407 AI.SSID == MMI->getWorkgroupSSID() || 408 AI.SSID == MMI->getWavefrontSSID()) { 409 Changed |= setGLC(MI); 410 return Changed; 411 } else { 412 reportUnknownSynchScope(MI); 413 return Changed; 414 } 415 } 416 417 bool SIMemoryLegalizer::expandAtomicRmw(const AtomicInfo &AI, 418 MachineBasicBlock::iterator &MI) { 419 assert(MI->mayLoad() && MI->mayStore()); 420 421 bool Changed = false; 422 if (AI.SSID == SyncScope::System || 423 AI.SSID == MMI->getAgentSSID()) { 424 if (AI.Ordering == AtomicOrdering::Release || 425 AI.Ordering == AtomicOrdering::AcquireRelease || 426 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 427 Changed |= insertWaitcntVmcnt0(MI); 428 429 if (AI.Ordering == AtomicOrdering::Acquire || 430 AI.Ordering == AtomicOrdering::AcquireRelease || 431 AI.Ordering == AtomicOrdering::SequentiallyConsistent) { 432 Changed |= insertWaitcntVmcnt0(MI, false); 433 Changed |= insertBufferWbinvl1Vol(MI, false); 434 } 435 436 return Changed; 437 } else if (AI.SSID == SyncScope::SingleThread || 438 AI.SSID == MMI->getWorkgroupSSID() || 439 AI.SSID == MMI->getWavefrontSSID()) { 440 Changed |= setGLC(MI); 441 return Changed; 442 } else { 443 reportUnknownSynchScope(MI); 444 return Changed; 445 } 446 } 447 448 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { 449 bool Changed = false; 450 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 451 const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); 452 453 CTX = &MF.getFunction()->getContext(); 454 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); 455 TII = ST.getInstrInfo(); 456 457 Vmcnt0Immediate = 458 AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); 459 Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? 460 AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; 461 462 for (auto &MBB : MF) { 463 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { 464 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) 465 continue; 466 467 if (const auto &AI = getAtomicFenceInfo(MI)) 468 Changed |= expandAtomicFence(AI.getValue(), MI); 469 else if (const auto &AI = getAtomicLoadInfo(MI)) 470 Changed |= expandAtomicLoad(AI.getValue(), MI); 471 else if (const auto &AI = getAtomicStoreInfo(MI)) 472 Changed |= expandAtomicStore(AI.getValue(), MI); 473 else if (const auto &AI = getAtomicCmpxchgInfo(MI)) 474 Changed |= expandAtomicCmpxchg(AI.getValue(), MI); 475 else if (const auto &AI = getAtomicRmwInfo(MI)) 476 Changed |= expandAtomicRmw(AI.getValue(), MI); 477 } 478 } 479 480 Changed |= removeAtomicPseudoMIs(); 481 return Changed; 482 } 483 484 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) 485 486 char SIMemoryLegalizer::ID = 0; 487 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; 488 489 FunctionPass *llvm::createSIMemoryLegalizerPass() { 490 return new SIMemoryLegalizer(); 491 } 492