1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Memory legalizer - implements memory model. More information can be 12 /// found here: 13 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "AMDGPU.h" 18 #include "AMDGPUMachineModuleInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "Utils/AMDGPUBaseInfo.h" 23 #include "llvm/ADT/None.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineMemOperand.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/IR/DiagnosticInfo.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/LLVMContext.h" 36 #include "llvm/MC/MCInstrDesc.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include <cassert> 40 #include <list> 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 45 #define DEBUG_TYPE "si-memory-legalizer" 46 #define PASS_NAME "SI Memory Legalizer" 47 48 namespace { 49 50 class SIMemOpInfo final { 51 private: 52 SyncScope::ID SSID = SyncScope::System; 53 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent; 54 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent; 55 56 SIMemOpInfo() = default; 57 58 SIMemOpInfo(SyncScope::ID SSID, 59 AtomicOrdering Ordering, 60 AtomicOrdering FailureOrdering) 61 : SSID(SSID), 62 Ordering(Ordering), 63 FailureOrdering(FailureOrdering) {} 64 65 SIMemOpInfo(const MachineMemOperand *MMO) 66 : SSID(MMO->getSyncScopeID()), 67 Ordering(MMO->getOrdering()), 68 FailureOrdering(MMO->getFailureOrdering()) {} 69 70 public: 71 /// \returns Synchronization scope ID of the machine instruction used to 72 /// create this SIMemOpInfo. 73 SyncScope::ID getSSID() const { 74 return SSID; 75 } 76 /// \returns Ordering constraint of the machine instruction used to 77 /// create this SIMemOpInfo. 78 AtomicOrdering getOrdering() const { 79 return Ordering; 80 } 81 /// \returns Failure ordering constraint of the machine instruction used to 82 /// create this SIMemOpInfo. 83 AtomicOrdering getFailureOrdering() const { 84 return FailureOrdering; 85 } 86 87 /// \returns Load info if \p MI is a load operation, "None" otherwise. 88 static Optional<SIMemOpInfo> getLoadInfo( 89 const MachineBasicBlock::iterator &MI); 90 /// \returns Store info if \p MI is a store operation, "None" otherwise. 91 static Optional<SIMemOpInfo> getStoreInfo( 92 const MachineBasicBlock::iterator &MI); 93 /// \returns Atomic fence info if \p MI is an atomic fence operation, 94 /// "None" otherwise. 95 static Optional<SIMemOpInfo> getAtomicFenceInfo( 96 const MachineBasicBlock::iterator &MI); 97 /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation, 98 /// "None" otherwise. 99 static Optional<SIMemOpInfo> getAtomicCmpxchgInfo( 100 const MachineBasicBlock::iterator &MI); 101 /// \returns Atomic rmw info if \p MI is an atomic rmw operation, 102 /// "None" otherwise. 103 static Optional<SIMemOpInfo> getAtomicRmwInfo( 104 const MachineBasicBlock::iterator &MI); 105 }; 106 107 class SIMemoryLegalizer final : public MachineFunctionPass { 108 private: 109 /// \brief LLVM context. 110 LLVMContext *CTX = nullptr; 111 112 /// \brief Machine module info. 113 const AMDGPUMachineModuleInfo *MMI = nullptr; 114 115 /// \brief Instruction info. 116 const SIInstrInfo *TII = nullptr; 117 118 /// \brief Immediate for "vmcnt(0)". 119 unsigned Vmcnt0Immediate = 0; 120 121 /// \brief Opcode for cache invalidation instruction (L1). 122 unsigned Wbinvl1Opcode = 0; 123 124 /// \brief List of atomic pseudo instructions. 125 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs; 126 127 /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. 128 /// Always returns true. 129 bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 130 bool Before = true) const; 131 /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. 132 /// Always returns true. 133 bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 134 bool Before = true) const; 135 136 /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is 137 /// modified, false otherwise. 138 bool setGLC(const MachineBasicBlock::iterator &MI) const; 139 140 /// \brief Removes all processed atomic pseudo instructions from the current 141 /// function. Returns true if current function is modified, false otherwise. 142 bool removeAtomicPseudoMIs(); 143 144 /// \brief Reports unknown synchronization scope used in \p MI to LLVM 145 /// context. 146 void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI); 147 148 /// \brief Expands load operation \p MI. Returns true if instructions are 149 /// added/deleted or \p MI is modified, false otherwise. 150 bool expandLoad(const SIMemOpInfo &MOI, 151 MachineBasicBlock::iterator &MI); 152 /// \brief Expands store operation \p MI. Returns true if instructions are 153 /// added/deleted or \p MI is modified, false otherwise. 154 bool expandStore(const SIMemOpInfo &MOI, 155 MachineBasicBlock::iterator &MI); 156 /// \brief Expands atomic fence operation \p MI. Returns true if 157 /// instructions are added/deleted or \p MI is modified, false otherwise. 158 bool expandAtomicFence(const SIMemOpInfo &MOI, 159 MachineBasicBlock::iterator &MI); 160 /// \brief Expands atomic cmpxchg operation \p MI. Returns true if 161 /// instructions are added/deleted or \p MI is modified, false otherwise. 162 bool expandAtomicCmpxchg(const SIMemOpInfo &MOI, 163 MachineBasicBlock::iterator &MI); 164 /// \brief Expands atomic rmw operation \p MI. Returns true if 165 /// instructions are added/deleted or \p MI is modified, false otherwise. 166 bool expandAtomicRmw(const SIMemOpInfo &MOI, 167 MachineBasicBlock::iterator &MI); 168 169 public: 170 static char ID; 171 172 SIMemoryLegalizer() : MachineFunctionPass(ID) {} 173 174 void getAnalysisUsage(AnalysisUsage &AU) const override { 175 AU.setPreservesCFG(); 176 MachineFunctionPass::getAnalysisUsage(AU); 177 } 178 179 StringRef getPassName() const override { 180 return PASS_NAME; 181 } 182 183 bool runOnMachineFunction(MachineFunction &MF) override; 184 }; 185 186 } // end namespace anonymous 187 188 /* static */ 189 Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo( 190 const MachineBasicBlock::iterator &MI) { 191 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 192 193 if (!(MI->mayLoad() && !MI->mayStore())) 194 return None; 195 if (!MI->hasOneMemOperand()) 196 return SIMemOpInfo(); 197 198 const MachineMemOperand *MMO = *MI->memoperands_begin(); 199 if (!MMO->isAtomic()) 200 return None; 201 202 return SIMemOpInfo(MMO); 203 } 204 205 /* static */ 206 Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo( 207 const MachineBasicBlock::iterator &MI) { 208 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 209 210 if (!(!MI->mayLoad() && MI->mayStore())) 211 return None; 212 if (!MI->hasOneMemOperand()) 213 return SIMemOpInfo(); 214 215 const MachineMemOperand *MMO = *MI->memoperands_begin(); 216 if (!MMO->isAtomic()) 217 return None; 218 219 return SIMemOpInfo(MMO); 220 } 221 222 /* static */ 223 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo( 224 const MachineBasicBlock::iterator &MI) { 225 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 226 227 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) 228 return None; 229 230 SyncScope::ID SSID = 231 static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); 232 AtomicOrdering Ordering = 233 static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); 234 return SIMemOpInfo(SSID, Ordering, AtomicOrdering::NotAtomic); 235 } 236 237 /* static */ 238 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgInfo( 239 const MachineBasicBlock::iterator &MI) { 240 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 241 242 if (!(MI->mayLoad() && MI->mayStore())) 243 return None; 244 if (!MI->hasOneMemOperand()) 245 return SIMemOpInfo(); 246 247 const MachineMemOperand *MMO = *MI->memoperands_begin(); 248 if (!MMO->isAtomic()) 249 return None; 250 if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic) 251 return None; 252 253 return SIMemOpInfo(MMO); 254 } 255 256 /* static */ 257 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicRmwInfo( 258 const MachineBasicBlock::iterator &MI) { 259 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 260 261 if (!(MI->mayLoad() && MI->mayStore())) 262 return None; 263 if (!MI->hasOneMemOperand()) 264 return SIMemOpInfo(); 265 266 const MachineMemOperand *MMO = *MI->memoperands_begin(); 267 if (!MMO->isAtomic()) 268 return None; 269 if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 270 return None; 271 272 return SIMemOpInfo(MMO); 273 } 274 275 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 276 bool Before) const { 277 MachineBasicBlock &MBB = *MI->getParent(); 278 DebugLoc DL = MI->getDebugLoc(); 279 280 if (!Before) 281 ++MI; 282 283 BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode)); 284 285 if (!Before) 286 --MI; 287 288 return true; 289 } 290 291 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 292 bool Before) const { 293 MachineBasicBlock &MBB = *MI->getParent(); 294 DebugLoc DL = MI->getDebugLoc(); 295 296 if (!Before) 297 ++MI; 298 299 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); 300 301 if (!Before) 302 --MI; 303 304 return true; 305 } 306 307 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const { 308 int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc); 309 if (GLCIdx == -1) 310 return false; 311 312 MachineOperand &GLC = MI->getOperand(GLCIdx); 313 if (GLC.getImm() == 1) 314 return false; 315 316 GLC.setImm(1); 317 return true; 318 } 319 320 bool SIMemoryLegalizer::removeAtomicPseudoMIs() { 321 if (AtomicPseudoMIs.empty()) 322 return false; 323 324 for (auto &MI : AtomicPseudoMIs) 325 MI->eraseFromParent(); 326 327 AtomicPseudoMIs.clear(); 328 return true; 329 } 330 331 void SIMemoryLegalizer::reportUnknownSynchScope( 332 const MachineBasicBlock::iterator &MI) { 333 DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(), 334 "Unsupported synchronization scope"); 335 CTX->diagnose(Diag); 336 } 337 338 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI, 339 MachineBasicBlock::iterator &MI) { 340 assert(MI->mayLoad() && !MI->mayStore()); 341 342 bool Changed = false; 343 if (MOI.getSSID() == SyncScope::System || 344 MOI.getSSID() == MMI->getAgentSSID()) { 345 if (MOI.getOrdering() == AtomicOrdering::Acquire || 346 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 347 Changed |= setGLC(MI); 348 349 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 350 Changed |= insertWaitcntVmcnt0(MI); 351 352 if (MOI.getOrdering() == AtomicOrdering::Acquire || 353 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) { 354 Changed |= insertWaitcntVmcnt0(MI, false); 355 Changed |= insertBufferWbinvl1Vol(MI, false); 356 } 357 358 return Changed; 359 } else if (MOI.getSSID() == SyncScope::SingleThread || 360 MOI.getSSID() == MMI->getWorkgroupSSID() || 361 MOI.getSSID() == MMI->getWavefrontSSID()) { 362 return Changed; 363 } else { 364 reportUnknownSynchScope(MI); 365 return Changed; 366 } 367 } 368 369 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI, 370 MachineBasicBlock::iterator &MI) { 371 assert(!MI->mayLoad() && MI->mayStore()); 372 373 bool Changed = false; 374 if (MOI.getSSID() == SyncScope::System || 375 MOI.getSSID() == MMI->getAgentSSID()) { 376 if (MOI.getOrdering() == AtomicOrdering::Release || 377 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 378 Changed |= insertWaitcntVmcnt0(MI); 379 380 return Changed; 381 } else if (MOI.getSSID() == SyncScope::SingleThread || 382 MOI.getSSID() == MMI->getWorkgroupSSID() || 383 MOI.getSSID() == MMI->getWavefrontSSID()) { 384 return Changed; 385 } else { 386 reportUnknownSynchScope(MI); 387 return Changed; 388 } 389 } 390 391 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, 392 MachineBasicBlock::iterator &MI) { 393 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); 394 395 bool Changed = false; 396 if (MOI.getSSID() == SyncScope::System || 397 MOI.getSSID() == MMI->getAgentSSID()) { 398 if (MOI.getOrdering() == AtomicOrdering::Acquire || 399 MOI.getOrdering() == AtomicOrdering::Release || 400 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 401 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 402 Changed |= insertWaitcntVmcnt0(MI); 403 404 if (MOI.getOrdering() == AtomicOrdering::Acquire || 405 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 406 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 407 Changed |= insertBufferWbinvl1Vol(MI); 408 409 AtomicPseudoMIs.push_back(MI); 410 return Changed; 411 } else if (MOI.getSSID() == SyncScope::SingleThread || 412 MOI.getSSID() == MMI->getWorkgroupSSID() || 413 MOI.getSSID() == MMI->getWavefrontSSID()) { 414 AtomicPseudoMIs.push_back(MI); 415 return Changed; 416 } else { 417 reportUnknownSynchScope(MI); 418 return Changed; 419 } 420 } 421 422 bool SIMemoryLegalizer::expandAtomicCmpxchg(const SIMemOpInfo &MOI, 423 MachineBasicBlock::iterator &MI) { 424 assert(MI->mayLoad() && MI->mayStore()); 425 426 bool Changed = false; 427 if (MOI.getSSID() == SyncScope::System || 428 MOI.getSSID() == MMI->getAgentSSID()) { 429 if (MOI.getOrdering() == AtomicOrdering::Release || 430 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 431 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || 432 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) 433 Changed |= insertWaitcntVmcnt0(MI); 434 435 if (MOI.getOrdering() == AtomicOrdering::Acquire || 436 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 437 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || 438 MOI.getFailureOrdering() == AtomicOrdering::Acquire || 439 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) { 440 Changed |= insertWaitcntVmcnt0(MI, false); 441 Changed |= insertBufferWbinvl1Vol(MI, false); 442 } 443 444 return Changed; 445 } else if (MOI.getSSID() == SyncScope::SingleThread || 446 MOI.getSSID() == MMI->getWorkgroupSSID() || 447 MOI.getSSID() == MMI->getWavefrontSSID()) { 448 Changed |= setGLC(MI); 449 return Changed; 450 } else { 451 reportUnknownSynchScope(MI); 452 return Changed; 453 } 454 } 455 456 bool SIMemoryLegalizer::expandAtomicRmw(const SIMemOpInfo &MOI, 457 MachineBasicBlock::iterator &MI) { 458 assert(MI->mayLoad() && MI->mayStore()); 459 460 bool Changed = false; 461 if (MOI.getSSID() == SyncScope::System || 462 MOI.getSSID() == MMI->getAgentSSID()) { 463 if (MOI.getOrdering() == AtomicOrdering::Release || 464 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 465 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 466 Changed |= insertWaitcntVmcnt0(MI); 467 468 if (MOI.getOrdering() == AtomicOrdering::Acquire || 469 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 470 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) { 471 Changed |= insertWaitcntVmcnt0(MI, false); 472 Changed |= insertBufferWbinvl1Vol(MI, false); 473 } 474 475 return Changed; 476 } else if (MOI.getSSID() == SyncScope::SingleThread || 477 MOI.getSSID() == MMI->getWorkgroupSSID() || 478 MOI.getSSID() == MMI->getWavefrontSSID()) { 479 Changed |= setGLC(MI); 480 return Changed; 481 } else { 482 reportUnknownSynchScope(MI); 483 return Changed; 484 } 485 } 486 487 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { 488 bool Changed = false; 489 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 490 const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); 491 492 CTX = &MF.getFunction()->getContext(); 493 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); 494 TII = ST.getInstrInfo(); 495 496 Vmcnt0Immediate = 497 AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); 498 Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? 499 AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; 500 501 for (auto &MBB : MF) { 502 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { 503 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) 504 continue; 505 506 if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI)) 507 Changed |= expandLoad(MOI.getValue(), MI); 508 else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI)) 509 Changed |= expandStore(MOI.getValue(), MI); 510 else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI)) 511 Changed |= expandAtomicFence(MOI.getValue(), MI); 512 else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgInfo(MI)) 513 Changed |= expandAtomicCmpxchg(MOI.getValue(), MI); 514 else if (const auto &MOI = SIMemOpInfo::getAtomicRmwInfo(MI)) 515 Changed |= expandAtomicRmw(MOI.getValue(), MI); 516 } 517 } 518 519 Changed |= removeAtomicPseudoMIs(); 520 return Changed; 521 } 522 523 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) 524 525 char SIMemoryLegalizer::ID = 0; 526 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; 527 528 FunctionPass *llvm::createSIMemoryLegalizerPass() { 529 return new SIMemoryLegalizer(); 530 } 531