1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Memory legalizer - implements memory model. More information can be 12 /// found here: 13 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "AMDGPU.h" 18 #include "AMDGPUMachineModuleInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "Utils/AMDGPUBaseInfo.h" 23 #include "llvm/ADT/None.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineMemOperand.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/IR/DiagnosticInfo.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/LLVMContext.h" 36 #include "llvm/MC/MCInstrDesc.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include <cassert> 40 #include <list> 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 45 #define DEBUG_TYPE "si-memory-legalizer" 46 #define PASS_NAME "SI Memory Legalizer" 47 48 namespace { 49 50 class SIMemoryLegalizer final : public MachineFunctionPass { 51 private: 52 struct AtomicInfo final { 53 SyncScope::ID SSID = SyncScope::System; 54 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent; 55 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent; 56 57 AtomicInfo() = default; 58 59 AtomicInfo(SyncScope::ID SSID, 60 AtomicOrdering Ordering, 61 AtomicOrdering FailureOrdering) 62 : SSID(SSID), 63 Ordering(Ordering), 64 FailureOrdering(FailureOrdering) {} 65 66 AtomicInfo(const MachineMemOperand *MMO) 67 : SSID(MMO->getSyncScopeID()), 68 Ordering(MMO->getOrdering()), 69 FailureOrdering(MMO->getFailureOrdering()) {} 70 }; 71 72 /// \brief LLVM context. 73 LLVMContext *CTX = nullptr; 74 75 /// \brief Machine module info. 76 const AMDGPUMachineModuleInfo *MMI = nullptr; 77 78 /// \brief Instruction info. 79 const SIInstrInfo *TII = nullptr; 80 81 /// \brief Immediate for "vmcnt(0)". 82 unsigned Vmcnt0Immediate = 0; 83 84 /// \brief Opcode for cache invalidation instruction (L1). 85 unsigned Wbinvl1Opcode = 0; 86 87 /// \brief List of atomic pseudo instructions. 88 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs; 89 90 /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. 91 /// Always returns true. 92 bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 93 bool Before = true) const; 94 /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. 95 /// Always returns true. 96 bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 97 bool Before = true) const; 98 99 /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is 100 /// modified, false otherwise. 101 bool setGLC(const MachineBasicBlock::iterator &MI) const; 102 103 /// \brief Removes all processed atomic pseudo instructions from the current 104 /// function. Returns true if current function is modified, false otherwise. 105 bool removeAtomicPseudoMIs(); 106 107 /// \brief Reports unknown synchronization scope used in \p MI to LLVM 108 /// context. 109 void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI); 110 111 /// \returns Atomic fence info if \p MI is an atomic fence operation, 112 /// "None" otherwise. 113 Optional<AtomicInfo> getAtomicFenceInfo( 114 const MachineBasicBlock::iterator &MI) const; 115 /// \returns Atomic load info if \p MI is an atomic load operation, 116 /// "None" otherwise. 117 Optional<AtomicInfo> getAtomicLoadInfo( 118 const MachineBasicBlock::iterator &MI) const; 119 /// \returns Atomic store info if \p MI is an atomic store operation, 120 /// "None" otherwise. 121 Optional<AtomicInfo> getAtomicStoreInfo( 122 const MachineBasicBlock::iterator &MI) const; 123 /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation, 124 /// "None" otherwise. 125 Optional<AtomicInfo> getAtomicCmpxchgInfo( 126 const MachineBasicBlock::iterator &MI) const; 127 /// \returns Atomic rmw info if \p MI is an atomic rmw operation, 128 /// "None" otherwise. 129 Optional<AtomicInfo> getAtomicRmwInfo( 130 const MachineBasicBlock::iterator &MI) const; 131 132 /// \brief Expands atomic fence operation \p MI. Returns true if 133 /// instructions are added/deleted or \p MI is modified, false otherwise. 134 bool expandAtomicFence(const AtomicInfo &AI, 135 MachineBasicBlock::iterator &MI); 136 /// \brief Expands atomic load operation \p MI. Returns true if 137 /// instructions are added/deleted or \p MI is modified, false otherwise. 138 bool expandAtomicLoad(const AtomicInfo &AI, 139 MachineBasicBlock::iterator &MI); 140 /// \brief Expands atomic store operation \p MI. Returns true if 141 /// instructions are added/deleted or \p MI is modified, false otherwise. 142 bool expandAtomicStore(const AtomicInfo &AI, 143 MachineBasicBlock::iterator &MI); 144 /// \brief Expands atomic cmpxchg operation \p MI. Returns true if 145 /// instructions are added/deleted or \p MI is modified, false otherwise. 146 bool expandAtomicCmpxchg(const AtomicInfo &AI, 147 MachineBasicBlock::iterator &MI); 148 /// \brief Expands atomic rmw operation \p MI. Returns true if 149 /// instructions are added/deleted or \p MI is modified, false otherwise. 150 bool expandAtomicRmw(const AtomicInfo &AI, 151 MachineBasicBlock::iterator &MI); 152 153 public: 154 static char ID; 155 156 SIMemoryLegalizer() : MachineFunctionPass(ID) {} 157 158 void getAnalysisUsage(AnalysisUsage &AU) const override { 159 AU.setPreservesCFG(); 160 MachineFunctionPass::getAnalysisUsage(AU); 161 } 162 163 StringRef getPassName() const override { 164 return PASS_NAME; 165 } 166 167 bool runOnMachineFunction(MachineFunction &MF) override; 168 }; 169 170 } // end namespace anonymous 171 172 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 173 bool Before) const { 174 MachineBasicBlock &MBB = *MI->getParent(); 175 DebugLoc DL = MI->getDebugLoc(); 176 177 if (!Before) 178 ++MI; 179 180 BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode)); 181 182 if (!Before) 183 --MI; 184 185 return true; 186 } 187 188 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 189 bool Before) const { 190 MachineBasicBlock &MBB = *MI->getParent(); 191 DebugLoc DL = MI->getDebugLoc(); 192 193 if (!Before) 194 ++MI; 195 196 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); 197 198 if (!Before) 199 --MI; 200 201 return true; 202 } 203 204 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const { 205 int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc); 206 if (GLCIdx == -1) 207 return false; 208 209 MachineOperand &GLC = MI->getOperand(GLCIdx); 210 if (GLC.getImm() == 1) 211 return false; 212 213 GLC.setImm(1); 214 return true; 215 } 216 217 bool SIMemoryLegalizer::removeAtomicPseudoMIs() { 218 if (AtomicPseudoMIs.empty()) 219 return false; 220 221 for (auto &MI : AtomicPseudoMIs) 222 MI->eraseFromParent(); 223 224 AtomicPseudoMIs.clear(); 225 return true; 226 } 227 228 void SIMemoryLegalizer::reportUnknownSynchScope( 229 const MachineBasicBlock::iterator &MI) { 230 DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(), 231 "Unsupported synchronization scope"); 232 CTX->diagnose(Diag); 233 } 234 235 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicFenceInfo( 236 const MachineBasicBlock::iterator &MI) const { 237 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 238 239 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) 240 return None; 241 242 SyncScope::ID SSID = 243 static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); 244 AtomicOrdering Ordering = 245 static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); 246 return AtomicInfo(SSID, Ordering, AtomicOrdering::NotAtomic); 247 } 248 249 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicLoadInfo( 250 const MachineBasicBlock::iterator &MI) const { 251 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 252 253 if (!(MI->mayLoad() && !MI->mayStore())) 254 return None; 255 if (!MI->hasOneMemOperand()) 256 return AtomicInfo(); 257 258 const MachineMemOperand *MMO = *MI->memoperands_begin(); 259 if (!MMO->isAtomic()) 260 return None; 261 262 return AtomicInfo(MMO); 263 } 264 265 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicStoreInfo( 266 const MachineBasicBlock::iterator &MI) const { 267 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 268 269 if (!(!MI->mayLoad() && MI->mayStore())) 270 return None; 271 if (!MI->hasOneMemOperand()) 272 return AtomicInfo(); 273 274 const MachineMemOperand *MMO = *MI->memoperands_begin(); 275 if (!MMO->isAtomic()) 276 return None; 277 278 return AtomicInfo(MMO); 279 } 280 281 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicCmpxchgInfo( 282 const MachineBasicBlock::iterator &MI) const { 283 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 284 285 if (!(MI->mayLoad() && MI->mayStore())) 286 return None; 287 if (!MI->hasOneMemOperand()) 288 return AtomicInfo(); 289 290 const MachineMemOperand *MMO = *MI->memoperands_begin(); 291 if (!MMO->isAtomic()) 292 return None; 293 if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic) 294 return None; 295 296 return AtomicInfo(MMO); 297 } 298 299 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicRmwInfo( 300 const MachineBasicBlock::iterator &MI) const { 301 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 302 303 if (!(MI->mayLoad() && MI->mayStore())) 304 return None; 305 if (!MI->hasOneMemOperand()) 306 return AtomicInfo(); 307 308 const MachineMemOperand *MMO = *MI->memoperands_begin(); 309 if (!MMO->isAtomic()) 310 return None; 311 if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 312 return None; 313 314 return AtomicInfo(MMO); 315 } 316 317 bool SIMemoryLegalizer::expandAtomicFence(const AtomicInfo &AI, 318 MachineBasicBlock::iterator &MI) { 319 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); 320 321 bool Changed = false; 322 if (AI.SSID == SyncScope::System || 323 AI.SSID == MMI->getAgentSSID()) { 324 if (AI.Ordering == AtomicOrdering::Acquire || 325 AI.Ordering == AtomicOrdering::Release || 326 AI.Ordering == AtomicOrdering::AcquireRelease || 327 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 328 Changed |= insertWaitcntVmcnt0(MI); 329 330 if (AI.Ordering == AtomicOrdering::Acquire || 331 AI.Ordering == AtomicOrdering::AcquireRelease || 332 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 333 Changed |= insertBufferWbinvl1Vol(MI); 334 335 AtomicPseudoMIs.push_back(MI); 336 return Changed; 337 } else if (AI.SSID == SyncScope::SingleThread || 338 AI.SSID == MMI->getWorkgroupSSID() || 339 AI.SSID == MMI->getWavefrontSSID()) { 340 AtomicPseudoMIs.push_back(MI); 341 return Changed; 342 } else { 343 reportUnknownSynchScope(MI); 344 return Changed; 345 } 346 } 347 348 bool SIMemoryLegalizer::expandAtomicLoad(const AtomicInfo &AI, 349 MachineBasicBlock::iterator &MI) { 350 assert(MI->mayLoad() && !MI->mayStore()); 351 352 bool Changed = false; 353 if (AI.SSID == SyncScope::System || 354 AI.SSID == MMI->getAgentSSID()) { 355 if (AI.Ordering == AtomicOrdering::Acquire || 356 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 357 Changed |= setGLC(MI); 358 359 if (AI.Ordering == AtomicOrdering::SequentiallyConsistent) 360 Changed |= insertWaitcntVmcnt0(MI); 361 362 if (AI.Ordering == AtomicOrdering::Acquire || 363 AI.Ordering == AtomicOrdering::SequentiallyConsistent) { 364 Changed |= insertWaitcntVmcnt0(MI, false); 365 Changed |= insertBufferWbinvl1Vol(MI, false); 366 } 367 368 return Changed; 369 } else if (AI.SSID == SyncScope::SingleThread || 370 AI.SSID == MMI->getWorkgroupSSID() || 371 AI.SSID == MMI->getWavefrontSSID()) { 372 return Changed; 373 } else { 374 reportUnknownSynchScope(MI); 375 return Changed; 376 } 377 } 378 379 bool SIMemoryLegalizer::expandAtomicStore(const AtomicInfo &AI, 380 MachineBasicBlock::iterator &MI) { 381 assert(!MI->mayLoad() && MI->mayStore()); 382 383 bool Changed = false; 384 if (AI.SSID == SyncScope::System || 385 AI.SSID == MMI->getAgentSSID()) { 386 if (AI.Ordering == AtomicOrdering::Release || 387 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 388 Changed |= insertWaitcntVmcnt0(MI); 389 390 return Changed; 391 } else if (AI.SSID == SyncScope::SingleThread || 392 AI.SSID == MMI->getWorkgroupSSID() || 393 AI.SSID == MMI->getWavefrontSSID()) { 394 return Changed; 395 } else { 396 reportUnknownSynchScope(MI); 397 return Changed; 398 } 399 } 400 401 bool SIMemoryLegalizer::expandAtomicCmpxchg(const AtomicInfo &AI, 402 MachineBasicBlock::iterator &MI) { 403 assert(MI->mayLoad() && MI->mayStore()); 404 405 bool Changed = false; 406 if (AI.SSID == SyncScope::System || 407 AI.SSID == MMI->getAgentSSID()) { 408 if (AI.Ordering == AtomicOrdering::Release || 409 AI.Ordering == AtomicOrdering::AcquireRelease || 410 AI.Ordering == AtomicOrdering::SequentiallyConsistent || 411 AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) 412 Changed |= insertWaitcntVmcnt0(MI); 413 414 if (AI.Ordering == AtomicOrdering::Acquire || 415 AI.Ordering == AtomicOrdering::AcquireRelease || 416 AI.Ordering == AtomicOrdering::SequentiallyConsistent || 417 AI.FailureOrdering == AtomicOrdering::Acquire || 418 AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) { 419 Changed |= insertWaitcntVmcnt0(MI, false); 420 Changed |= insertBufferWbinvl1Vol(MI, false); 421 } 422 423 return Changed; 424 } else if (AI.SSID == SyncScope::SingleThread || 425 AI.SSID == MMI->getWorkgroupSSID() || 426 AI.SSID == MMI->getWavefrontSSID()) { 427 Changed |= setGLC(MI); 428 return Changed; 429 } else { 430 reportUnknownSynchScope(MI); 431 return Changed; 432 } 433 } 434 435 bool SIMemoryLegalizer::expandAtomicRmw(const AtomicInfo &AI, 436 MachineBasicBlock::iterator &MI) { 437 assert(MI->mayLoad() && MI->mayStore()); 438 439 bool Changed = false; 440 if (AI.SSID == SyncScope::System || 441 AI.SSID == MMI->getAgentSSID()) { 442 if (AI.Ordering == AtomicOrdering::Release || 443 AI.Ordering == AtomicOrdering::AcquireRelease || 444 AI.Ordering == AtomicOrdering::SequentiallyConsistent) 445 Changed |= insertWaitcntVmcnt0(MI); 446 447 if (AI.Ordering == AtomicOrdering::Acquire || 448 AI.Ordering == AtomicOrdering::AcquireRelease || 449 AI.Ordering == AtomicOrdering::SequentiallyConsistent) { 450 Changed |= insertWaitcntVmcnt0(MI, false); 451 Changed |= insertBufferWbinvl1Vol(MI, false); 452 } 453 454 return Changed; 455 } else if (AI.SSID == SyncScope::SingleThread || 456 AI.SSID == MMI->getWorkgroupSSID() || 457 AI.SSID == MMI->getWavefrontSSID()) { 458 Changed |= setGLC(MI); 459 return Changed; 460 } else { 461 reportUnknownSynchScope(MI); 462 return Changed; 463 } 464 } 465 466 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { 467 bool Changed = false; 468 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 469 const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); 470 471 CTX = &MF.getFunction()->getContext(); 472 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); 473 TII = ST.getInstrInfo(); 474 475 Vmcnt0Immediate = 476 AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); 477 Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? 478 AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; 479 480 for (auto &MBB : MF) { 481 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { 482 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) 483 continue; 484 485 if (const auto &AI = getAtomicFenceInfo(MI)) 486 Changed |= expandAtomicFence(AI.getValue(), MI); 487 else if (const auto &AI = getAtomicLoadInfo(MI)) 488 Changed |= expandAtomicLoad(AI.getValue(), MI); 489 else if (const auto &AI = getAtomicStoreInfo(MI)) 490 Changed |= expandAtomicStore(AI.getValue(), MI); 491 else if (const auto &AI = getAtomicCmpxchgInfo(MI)) 492 Changed |= expandAtomicCmpxchg(AI.getValue(), MI); 493 else if (const auto &AI = getAtomicRmwInfo(MI)) 494 Changed |= expandAtomicRmw(AI.getValue(), MI); 495 } 496 } 497 498 Changed |= removeAtomicPseudoMIs(); 499 return Changed; 500 } 501 502 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) 503 504 char SIMemoryLegalizer::ID = 0; 505 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; 506 507 FunctionPass *llvm::createSIMemoryLegalizerPass() { 508 return new SIMemoryLegalizer(); 509 } 510