1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Memory legalizer - implements memory model. More information can be 12 /// found here: 13 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "AMDGPU.h" 18 #include "AMDGPUMachineModuleInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "Utils/AMDGPUBaseInfo.h" 23 #include "llvm/ADT/None.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineMemOperand.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/IR/DiagnosticInfo.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/LLVMContext.h" 36 #include "llvm/MC/MCInstrDesc.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include <cassert> 40 #include <list> 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 45 #define DEBUG_TYPE "si-memory-legalizer" 46 #define PASS_NAME "SI Memory Legalizer" 47 48 namespace { 49 50 struct SIMemOpInfo final { 51 SyncScope::ID SSID = SyncScope::System; 52 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent; 53 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent; 54 55 SIMemOpInfo() = default; 56 57 SIMemOpInfo(SyncScope::ID SSID, 58 AtomicOrdering Ordering, 59 AtomicOrdering FailureOrdering) 60 : SSID(SSID), 61 Ordering(Ordering), 62 FailureOrdering(FailureOrdering) {} 63 64 SIMemOpInfo(const MachineMemOperand *MMO) 65 : SSID(MMO->getSyncScopeID()), 66 Ordering(MMO->getOrdering()), 67 FailureOrdering(MMO->getFailureOrdering()) {} 68 }; 69 70 class SIMemoryLegalizer final : public MachineFunctionPass { 71 private: 72 /// \brief LLVM context. 73 LLVMContext *CTX = nullptr; 74 75 /// \brief Machine module info. 76 const AMDGPUMachineModuleInfo *MMI = nullptr; 77 78 /// \brief Instruction info. 79 const SIInstrInfo *TII = nullptr; 80 81 /// \brief Immediate for "vmcnt(0)". 82 unsigned Vmcnt0Immediate = 0; 83 84 /// \brief Opcode for cache invalidation instruction (L1). 85 unsigned Wbinvl1Opcode = 0; 86 87 /// \brief List of atomic pseudo instructions. 88 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs; 89 90 /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. 91 /// Always returns true. 92 bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 93 bool Before = true) const; 94 /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. 95 /// Always returns true. 96 bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 97 bool Before = true) const; 98 99 /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is 100 /// modified, false otherwise. 101 bool setGLC(const MachineBasicBlock::iterator &MI) const; 102 103 /// \brief Removes all processed atomic pseudo instructions from the current 104 /// function. Returns true if current function is modified, false otherwise. 105 bool removeAtomicPseudoMIs(); 106 107 /// \brief Reports unknown synchronization scope used in \p MI to LLVM 108 /// context. 109 void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI); 110 111 /// \returns Load info if \p MI is a load operation, "None" otherwise. 112 Optional<SIMemOpInfo> getLoadInfo( 113 const MachineBasicBlock::iterator &MI) const; 114 /// \returns Store info if \p MI is a store operation, "None" otherwise. 115 Optional<SIMemOpInfo> getStoreInfo( 116 const MachineBasicBlock::iterator &MI) const; 117 /// \returns Atomic fence info if \p MI is an atomic fence operation, 118 /// "None" otherwise. 119 Optional<SIMemOpInfo> getAtomicFenceInfo( 120 const MachineBasicBlock::iterator &MI) const; 121 /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation, 122 /// "None" otherwise. 123 Optional<SIMemOpInfo> getAtomicCmpxchgInfo( 124 const MachineBasicBlock::iterator &MI) const; 125 /// \returns Atomic rmw info if \p MI is an atomic rmw operation, 126 /// "None" otherwise. 127 Optional<SIMemOpInfo> getAtomicRmwInfo( 128 const MachineBasicBlock::iterator &MI) const; 129 130 /// \brief Expands load operation \p MI. Returns true if instructions are 131 /// added/deleted or \p MI is modified, false otherwise. 132 bool expandLoad(const SIMemOpInfo &MOI, 133 MachineBasicBlock::iterator &MI); 134 /// \brief Expands store operation \p MI. Returns true if instructions are 135 /// added/deleted or \p MI is modified, false otherwise. 136 bool expandStore(const SIMemOpInfo &MOI, 137 MachineBasicBlock::iterator &MI); 138 /// \brief Expands atomic fence operation \p MI. Returns true if 139 /// instructions are added/deleted or \p MI is modified, false otherwise. 140 bool expandAtomicFence(const SIMemOpInfo &MOI, 141 MachineBasicBlock::iterator &MI); 142 /// \brief Expands atomic cmpxchg operation \p MI. Returns true if 143 /// instructions are added/deleted or \p MI is modified, false otherwise. 144 bool expandAtomicCmpxchg(const SIMemOpInfo &MOI, 145 MachineBasicBlock::iterator &MI); 146 /// \brief Expands atomic rmw operation \p MI. Returns true if 147 /// instructions are added/deleted or \p MI is modified, false otherwise. 148 bool expandAtomicRmw(const SIMemOpInfo &MOI, 149 MachineBasicBlock::iterator &MI); 150 151 public: 152 static char ID; 153 154 SIMemoryLegalizer() : MachineFunctionPass(ID) {} 155 156 void getAnalysisUsage(AnalysisUsage &AU) const override { 157 AU.setPreservesCFG(); 158 MachineFunctionPass::getAnalysisUsage(AU); 159 } 160 161 StringRef getPassName() const override { 162 return PASS_NAME; 163 } 164 165 bool runOnMachineFunction(MachineFunction &MF) override; 166 }; 167 168 } // end namespace anonymous 169 170 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 171 bool Before) const { 172 MachineBasicBlock &MBB = *MI->getParent(); 173 DebugLoc DL = MI->getDebugLoc(); 174 175 if (!Before) 176 ++MI; 177 178 BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode)); 179 180 if (!Before) 181 --MI; 182 183 return true; 184 } 185 186 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 187 bool Before) const { 188 MachineBasicBlock &MBB = *MI->getParent(); 189 DebugLoc DL = MI->getDebugLoc(); 190 191 if (!Before) 192 ++MI; 193 194 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); 195 196 if (!Before) 197 --MI; 198 199 return true; 200 } 201 202 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const { 203 int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc); 204 if (GLCIdx == -1) 205 return false; 206 207 MachineOperand &GLC = MI->getOperand(GLCIdx); 208 if (GLC.getImm() == 1) 209 return false; 210 211 GLC.setImm(1); 212 return true; 213 } 214 215 bool SIMemoryLegalizer::removeAtomicPseudoMIs() { 216 if (AtomicPseudoMIs.empty()) 217 return false; 218 219 for (auto &MI : AtomicPseudoMIs) 220 MI->eraseFromParent(); 221 222 AtomicPseudoMIs.clear(); 223 return true; 224 } 225 226 void SIMemoryLegalizer::reportUnknownSynchScope( 227 const MachineBasicBlock::iterator &MI) { 228 DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(), 229 "Unsupported synchronization scope"); 230 CTX->diagnose(Diag); 231 } 232 233 Optional<SIMemOpInfo> SIMemoryLegalizer::getLoadInfo( 234 const MachineBasicBlock::iterator &MI) const { 235 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 236 237 if (!(MI->mayLoad() && !MI->mayStore())) 238 return None; 239 if (!MI->hasOneMemOperand()) 240 return SIMemOpInfo(); 241 242 const MachineMemOperand *MMO = *MI->memoperands_begin(); 243 if (!MMO->isAtomic()) 244 return None; 245 246 return SIMemOpInfo(MMO); 247 } 248 249 Optional<SIMemOpInfo> SIMemoryLegalizer::getStoreInfo( 250 const MachineBasicBlock::iterator &MI) const { 251 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 252 253 if (!(!MI->mayLoad() && MI->mayStore())) 254 return None; 255 if (!MI->hasOneMemOperand()) 256 return SIMemOpInfo(); 257 258 const MachineMemOperand *MMO = *MI->memoperands_begin(); 259 if (!MMO->isAtomic()) 260 return None; 261 262 return SIMemOpInfo(MMO); 263 } 264 265 Optional<SIMemOpInfo> SIMemoryLegalizer::getAtomicFenceInfo( 266 const MachineBasicBlock::iterator &MI) const { 267 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 268 269 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) 270 return None; 271 272 SyncScope::ID SSID = 273 static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); 274 AtomicOrdering Ordering = 275 static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); 276 return SIMemOpInfo(SSID, Ordering, AtomicOrdering::NotAtomic); 277 } 278 279 Optional<SIMemOpInfo> SIMemoryLegalizer::getAtomicCmpxchgInfo( 280 const MachineBasicBlock::iterator &MI) const { 281 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 282 283 if (!(MI->mayLoad() && MI->mayStore())) 284 return None; 285 if (!MI->hasOneMemOperand()) 286 return SIMemOpInfo(); 287 288 const MachineMemOperand *MMO = *MI->memoperands_begin(); 289 if (!MMO->isAtomic()) 290 return None; 291 if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic) 292 return None; 293 294 return SIMemOpInfo(MMO); 295 } 296 297 Optional<SIMemOpInfo> SIMemoryLegalizer::getAtomicRmwInfo( 298 const MachineBasicBlock::iterator &MI) const { 299 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 300 301 if (!(MI->mayLoad() && MI->mayStore())) 302 return None; 303 if (!MI->hasOneMemOperand()) 304 return SIMemOpInfo(); 305 306 const MachineMemOperand *MMO = *MI->memoperands_begin(); 307 if (!MMO->isAtomic()) 308 return None; 309 if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 310 return None; 311 312 return SIMemOpInfo(MMO); 313 } 314 315 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI, 316 MachineBasicBlock::iterator &MI) { 317 assert(MI->mayLoad() && !MI->mayStore()); 318 319 bool Changed = false; 320 if (MOI.SSID == SyncScope::System || 321 MOI.SSID == MMI->getAgentSSID()) { 322 if (MOI.Ordering == AtomicOrdering::Acquire || 323 MOI.Ordering == AtomicOrdering::SequentiallyConsistent) 324 Changed |= setGLC(MI); 325 326 if (MOI.Ordering == AtomicOrdering::SequentiallyConsistent) 327 Changed |= insertWaitcntVmcnt0(MI); 328 329 if (MOI.Ordering == AtomicOrdering::Acquire || 330 MOI.Ordering == AtomicOrdering::SequentiallyConsistent) { 331 Changed |= insertWaitcntVmcnt0(MI, false); 332 Changed |= insertBufferWbinvl1Vol(MI, false); 333 } 334 335 return Changed; 336 } else if (MOI.SSID == SyncScope::SingleThread || 337 MOI.SSID == MMI->getWorkgroupSSID() || 338 MOI.SSID == MMI->getWavefrontSSID()) { 339 return Changed; 340 } else { 341 reportUnknownSynchScope(MI); 342 return Changed; 343 } 344 } 345 346 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI, 347 MachineBasicBlock::iterator &MI) { 348 assert(!MI->mayLoad() && MI->mayStore()); 349 350 bool Changed = false; 351 if (MOI.SSID == SyncScope::System || 352 MOI.SSID == MMI->getAgentSSID()) { 353 if (MOI.Ordering == AtomicOrdering::Release || 354 MOI.Ordering == AtomicOrdering::SequentiallyConsistent) 355 Changed |= insertWaitcntVmcnt0(MI); 356 357 return Changed; 358 } else if (MOI.SSID == SyncScope::SingleThread || 359 MOI.SSID == MMI->getWorkgroupSSID() || 360 MOI.SSID == MMI->getWavefrontSSID()) { 361 return Changed; 362 } else { 363 reportUnknownSynchScope(MI); 364 return Changed; 365 } 366 } 367 368 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, 369 MachineBasicBlock::iterator &MI) { 370 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); 371 372 bool Changed = false; 373 if (MOI.SSID == SyncScope::System || 374 MOI.SSID == MMI->getAgentSSID()) { 375 if (MOI.Ordering == AtomicOrdering::Acquire || 376 MOI.Ordering == AtomicOrdering::Release || 377 MOI.Ordering == AtomicOrdering::AcquireRelease || 378 MOI.Ordering == AtomicOrdering::SequentiallyConsistent) 379 Changed |= insertWaitcntVmcnt0(MI); 380 381 if (MOI.Ordering == AtomicOrdering::Acquire || 382 MOI.Ordering == AtomicOrdering::AcquireRelease || 383 MOI.Ordering == AtomicOrdering::SequentiallyConsistent) 384 Changed |= insertBufferWbinvl1Vol(MI); 385 386 AtomicPseudoMIs.push_back(MI); 387 return Changed; 388 } else if (MOI.SSID == SyncScope::SingleThread || 389 MOI.SSID == MMI->getWorkgroupSSID() || 390 MOI.SSID == MMI->getWavefrontSSID()) { 391 AtomicPseudoMIs.push_back(MI); 392 return Changed; 393 } else { 394 reportUnknownSynchScope(MI); 395 return Changed; 396 } 397 } 398 399 bool SIMemoryLegalizer::expandAtomicCmpxchg(const SIMemOpInfo &MOI, 400 MachineBasicBlock::iterator &MI) { 401 assert(MI->mayLoad() && MI->mayStore()); 402 403 bool Changed = false; 404 if (MOI.SSID == SyncScope::System || 405 MOI.SSID == MMI->getAgentSSID()) { 406 if (MOI.Ordering == AtomicOrdering::Release || 407 MOI.Ordering == AtomicOrdering::AcquireRelease || 408 MOI.Ordering == AtomicOrdering::SequentiallyConsistent || 409 MOI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) 410 Changed |= insertWaitcntVmcnt0(MI); 411 412 if (MOI.Ordering == AtomicOrdering::Acquire || 413 MOI.Ordering == AtomicOrdering::AcquireRelease || 414 MOI.Ordering == AtomicOrdering::SequentiallyConsistent || 415 MOI.FailureOrdering == AtomicOrdering::Acquire || 416 MOI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) { 417 Changed |= insertWaitcntVmcnt0(MI, false); 418 Changed |= insertBufferWbinvl1Vol(MI, false); 419 } 420 421 return Changed; 422 } else if (MOI.SSID == SyncScope::SingleThread || 423 MOI.SSID == MMI->getWorkgroupSSID() || 424 MOI.SSID == MMI->getWavefrontSSID()) { 425 Changed |= setGLC(MI); 426 return Changed; 427 } else { 428 reportUnknownSynchScope(MI); 429 return Changed; 430 } 431 } 432 433 bool SIMemoryLegalizer::expandAtomicRmw(const SIMemOpInfo &MOI, 434 MachineBasicBlock::iterator &MI) { 435 assert(MI->mayLoad() && MI->mayStore()); 436 437 bool Changed = false; 438 if (MOI.SSID == SyncScope::System || 439 MOI.SSID == MMI->getAgentSSID()) { 440 if (MOI.Ordering == AtomicOrdering::Release || 441 MOI.Ordering == AtomicOrdering::AcquireRelease || 442 MOI.Ordering == AtomicOrdering::SequentiallyConsistent) 443 Changed |= insertWaitcntVmcnt0(MI); 444 445 if (MOI.Ordering == AtomicOrdering::Acquire || 446 MOI.Ordering == AtomicOrdering::AcquireRelease || 447 MOI.Ordering == AtomicOrdering::SequentiallyConsistent) { 448 Changed |= insertWaitcntVmcnt0(MI, false); 449 Changed |= insertBufferWbinvl1Vol(MI, false); 450 } 451 452 return Changed; 453 } else if (MOI.SSID == SyncScope::SingleThread || 454 MOI.SSID == MMI->getWorkgroupSSID() || 455 MOI.SSID == MMI->getWavefrontSSID()) { 456 Changed |= setGLC(MI); 457 return Changed; 458 } else { 459 reportUnknownSynchScope(MI); 460 return Changed; 461 } 462 } 463 464 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { 465 bool Changed = false; 466 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 467 const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); 468 469 CTX = &MF.getFunction()->getContext(); 470 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); 471 TII = ST.getInstrInfo(); 472 473 Vmcnt0Immediate = 474 AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); 475 Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? 476 AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; 477 478 for (auto &MBB : MF) { 479 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { 480 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) 481 continue; 482 483 if (const auto &MOI = getLoadInfo(MI)) 484 Changed |= expandLoad(MOI.getValue(), MI); 485 else if (const auto &MOI = getStoreInfo(MI)) 486 Changed |= expandStore(MOI.getValue(), MI); 487 else if (const auto &MOI = getAtomicFenceInfo(MI)) 488 Changed |= expandAtomicFence(MOI.getValue(), MI); 489 else if (const auto &MOI = getAtomicCmpxchgInfo(MI)) 490 Changed |= expandAtomicCmpxchg(MOI.getValue(), MI); 491 else if (const auto &MOI = getAtomicRmwInfo(MI)) 492 Changed |= expandAtomicRmw(MOI.getValue(), MI); 493 } 494 } 495 496 Changed |= removeAtomicPseudoMIs(); 497 return Changed; 498 } 499 500 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) 501 502 char SIMemoryLegalizer::ID = 0; 503 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; 504 505 FunctionPass *llvm::createSIMemoryLegalizerPass() { 506 return new SIMemoryLegalizer(); 507 } 508