1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Memory legalizer - implements memory model. More information can be 12 /// found here: 13 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "AMDGPU.h" 18 #include "AMDGPUMachineModuleInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "Utils/AMDGPUBaseInfo.h" 23 #include "llvm/ADT/None.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineMemOperand.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/IR/DiagnosticInfo.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/LLVMContext.h" 36 #include "llvm/MC/MCInstrDesc.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include <cassert> 40 #include <list> 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 45 #define DEBUG_TYPE "si-memory-legalizer" 46 #define PASS_NAME "SI Memory Legalizer" 47 48 namespace { 49 50 class SIMemOpInfo final { 51 private: 52 SyncScope::ID SSID = SyncScope::System; 53 AtomicOrdering Ordering = AtomicOrdering::NotAtomic; 54 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; 55 56 SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering) 57 : SSID(SSID), Ordering(Ordering) {} 58 59 SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering, 60 AtomicOrdering FailureOrdering) 61 : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering) {} 62 63 public: 64 /// \returns Synchronization scope ID of the machine instruction used to 65 /// create this SIMemOpInfo. 66 SyncScope::ID getSSID() const { 67 return SSID; 68 } 69 /// \returns Ordering constraint of the machine instruction used to 70 /// create this SIMemOpInfo. 71 AtomicOrdering getOrdering() const { 72 return Ordering; 73 } 74 /// \returns Failure ordering constraint of the machine instruction used to 75 /// create this SIMemOpInfo. 76 AtomicOrdering getFailureOrdering() const { 77 return FailureOrdering; 78 } 79 80 /// \returns True if ordering constraint of the machine instruction used to 81 /// create this SIMemOpInfo is unordered or higher, false otherwise. 82 bool isAtomic() const { 83 return Ordering != AtomicOrdering::NotAtomic; 84 } 85 86 /// \returns Load info if \p MI is a load operation, "None" otherwise. 87 static Optional<SIMemOpInfo> getLoadInfo( 88 const MachineBasicBlock::iterator &MI); 89 /// \returns Store info if \p MI is a store operation, "None" otherwise. 90 static Optional<SIMemOpInfo> getStoreInfo( 91 const MachineBasicBlock::iterator &MI); 92 /// \returns Atomic fence info if \p MI is an atomic fence operation, 93 /// "None" otherwise. 94 static Optional<SIMemOpInfo> getAtomicFenceInfo( 95 const MachineBasicBlock::iterator &MI); 96 /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation, 97 /// "None" otherwise. 98 static Optional<SIMemOpInfo> getAtomicCmpxchgInfo( 99 const MachineBasicBlock::iterator &MI); 100 /// \returns Atomic rmw info if \p MI is an atomic rmw operation, 101 /// "None" otherwise. 102 static Optional<SIMemOpInfo> getAtomicRmwInfo( 103 const MachineBasicBlock::iterator &MI); 104 }; 105 106 class SIMemoryLegalizer final : public MachineFunctionPass { 107 private: 108 /// \brief LLVM context. 109 LLVMContext *CTX = nullptr; 110 111 /// \brief Machine module info. 112 const AMDGPUMachineModuleInfo *MMI = nullptr; 113 114 /// \brief Instruction info. 115 const SIInstrInfo *TII = nullptr; 116 117 /// \brief Immediate for "vmcnt(0)". 118 unsigned Vmcnt0Immediate = 0; 119 120 /// \brief Opcode for cache invalidation instruction (L1). 121 unsigned Wbinvl1Opcode = 0; 122 123 /// \brief List of atomic pseudo instructions. 124 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs; 125 126 /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. 127 /// Always returns true. 128 bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 129 bool Before = true) const; 130 /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. 131 /// Always returns true. 132 bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 133 bool Before = true) const; 134 135 /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is 136 /// modified, false otherwise. 137 bool setGLC(const MachineBasicBlock::iterator &MI) const; 138 139 /// \brief Removes all processed atomic pseudo instructions from the current 140 /// function. Returns true if current function is modified, false otherwise. 141 bool removeAtomicPseudoMIs(); 142 143 /// \brief Reports unknown synchronization scope used in \p MI to LLVM 144 /// context. 145 void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI); 146 147 /// \brief Expands load operation \p MI. Returns true if instructions are 148 /// added/deleted or \p MI is modified, false otherwise. 149 bool expandLoad(const SIMemOpInfo &MOI, 150 MachineBasicBlock::iterator &MI); 151 /// \brief Expands store operation \p MI. Returns true if instructions are 152 /// added/deleted or \p MI is modified, false otherwise. 153 bool expandStore(const SIMemOpInfo &MOI, 154 MachineBasicBlock::iterator &MI); 155 /// \brief Expands atomic fence operation \p MI. Returns true if 156 /// instructions are added/deleted or \p MI is modified, false otherwise. 157 bool expandAtomicFence(const SIMemOpInfo &MOI, 158 MachineBasicBlock::iterator &MI); 159 /// \brief Expands atomic cmpxchg operation \p MI. Returns true if 160 /// instructions are added/deleted or \p MI is modified, false otherwise. 161 bool expandAtomicCmpxchg(const SIMemOpInfo &MOI, 162 MachineBasicBlock::iterator &MI); 163 /// \brief Expands atomic rmw operation \p MI. Returns true if 164 /// instructions are added/deleted or \p MI is modified, false otherwise. 165 bool expandAtomicRmw(const SIMemOpInfo &MOI, 166 MachineBasicBlock::iterator &MI); 167 168 public: 169 static char ID; 170 171 SIMemoryLegalizer() : MachineFunctionPass(ID) {} 172 173 void getAnalysisUsage(AnalysisUsage &AU) const override { 174 AU.setPreservesCFG(); 175 MachineFunctionPass::getAnalysisUsage(AU); 176 } 177 178 StringRef getPassName() const override { 179 return PASS_NAME; 180 } 181 182 bool runOnMachineFunction(MachineFunction &MF) override; 183 }; 184 185 } // end namespace anonymous 186 187 /* static */ 188 Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo( 189 const MachineBasicBlock::iterator &MI) { 190 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 191 192 if (!(MI->mayLoad() && !MI->mayStore())) 193 return None; 194 if (!MI->hasOneMemOperand()) 195 return SIMemOpInfo(SyncScope::System, 196 AtomicOrdering::SequentiallyConsistent); 197 198 const MachineMemOperand *MMO = *MI->memoperands_begin(); 199 return SIMemOpInfo(MMO->getSyncScopeID(), MMO->getOrdering()); 200 } 201 202 /* static */ 203 Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo( 204 const MachineBasicBlock::iterator &MI) { 205 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 206 207 if (!(!MI->mayLoad() && MI->mayStore())) 208 return None; 209 if (!MI->hasOneMemOperand()) 210 return SIMemOpInfo(SyncScope::System, 211 AtomicOrdering::SequentiallyConsistent); 212 213 const MachineMemOperand *MMO = *MI->memoperands_begin(); 214 return SIMemOpInfo(MMO->getSyncScopeID(), MMO->getOrdering()); 215 } 216 217 /* static */ 218 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo( 219 const MachineBasicBlock::iterator &MI) { 220 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 221 222 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) 223 return None; 224 225 SyncScope::ID SSID = 226 static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); 227 AtomicOrdering Ordering = 228 static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); 229 return SIMemOpInfo(SSID, Ordering); 230 } 231 232 /* static */ 233 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgInfo( 234 const MachineBasicBlock::iterator &MI) { 235 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 236 237 if (!(MI->mayLoad() && MI->mayStore())) 238 return None; 239 if (!MI->hasOneMemOperand()) 240 return SIMemOpInfo(SyncScope::System, 241 AtomicOrdering::SequentiallyConsistent, 242 AtomicOrdering::SequentiallyConsistent); 243 244 const MachineMemOperand *MMO = *MI->memoperands_begin(); 245 return SIMemOpInfo(MMO->getSyncScopeID(), MMO->getOrdering(), 246 MMO->getFailureOrdering()); 247 } 248 249 /* static */ 250 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicRmwInfo( 251 const MachineBasicBlock::iterator &MI) { 252 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 253 254 if (!(MI->mayLoad() && MI->mayStore())) 255 return None; 256 if (!MI->hasOneMemOperand()) 257 return SIMemOpInfo(SyncScope::System, 258 AtomicOrdering::SequentiallyConsistent); 259 260 const MachineMemOperand *MMO = *MI->memoperands_begin(); 261 return SIMemOpInfo(MMO->getSyncScopeID(), MMO->getOrdering()); 262 } 263 264 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, 265 bool Before) const { 266 MachineBasicBlock &MBB = *MI->getParent(); 267 DebugLoc DL = MI->getDebugLoc(); 268 269 if (!Before) 270 ++MI; 271 272 BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode)); 273 274 if (!Before) 275 --MI; 276 277 return true; 278 } 279 280 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 281 bool Before) const { 282 MachineBasicBlock &MBB = *MI->getParent(); 283 DebugLoc DL = MI->getDebugLoc(); 284 285 if (!Before) 286 ++MI; 287 288 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); 289 290 if (!Before) 291 --MI; 292 293 return true; 294 } 295 296 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const { 297 int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc); 298 if (GLCIdx == -1) 299 return false; 300 301 MachineOperand &GLC = MI->getOperand(GLCIdx); 302 if (GLC.getImm() == 1) 303 return false; 304 305 GLC.setImm(1); 306 return true; 307 } 308 309 bool SIMemoryLegalizer::removeAtomicPseudoMIs() { 310 if (AtomicPseudoMIs.empty()) 311 return false; 312 313 for (auto &MI : AtomicPseudoMIs) 314 MI->eraseFromParent(); 315 316 AtomicPseudoMIs.clear(); 317 return true; 318 } 319 320 void SIMemoryLegalizer::reportUnknownSynchScope( 321 const MachineBasicBlock::iterator &MI) { 322 DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(), 323 "Unsupported synchronization scope"); 324 CTX->diagnose(Diag); 325 } 326 327 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI, 328 MachineBasicBlock::iterator &MI) { 329 assert(MI->mayLoad() && !MI->mayStore()); 330 331 bool Changed = false; 332 333 if (MOI.isAtomic()) { 334 if (MOI.getSSID() == SyncScope::System || 335 MOI.getSSID() == MMI->getAgentSSID()) { 336 if (MOI.getOrdering() == AtomicOrdering::Acquire || 337 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 338 Changed |= setGLC(MI); 339 340 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 341 Changed |= insertWaitcntVmcnt0(MI); 342 343 if (MOI.getOrdering() == AtomicOrdering::Acquire || 344 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) { 345 Changed |= insertWaitcntVmcnt0(MI, false); 346 Changed |= insertBufferWbinvl1Vol(MI, false); 347 } 348 349 return Changed; 350 } else if (MOI.getSSID() == SyncScope::SingleThread || 351 MOI.getSSID() == MMI->getWorkgroupSSID() || 352 MOI.getSSID() == MMI->getWavefrontSSID()) { 353 return Changed; 354 } else { 355 reportUnknownSynchScope(MI); 356 return Changed; 357 } 358 } 359 360 return Changed; 361 } 362 363 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI, 364 MachineBasicBlock::iterator &MI) { 365 assert(!MI->mayLoad() && MI->mayStore()); 366 367 bool Changed = false; 368 369 if (MOI.isAtomic()) { 370 if (MOI.getSSID() == SyncScope::System || 371 MOI.getSSID() == MMI->getAgentSSID()) { 372 if (MOI.getOrdering() == AtomicOrdering::Release || 373 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 374 Changed |= insertWaitcntVmcnt0(MI); 375 376 return Changed; 377 } else if (MOI.getSSID() == SyncScope::SingleThread || 378 MOI.getSSID() == MMI->getWorkgroupSSID() || 379 MOI.getSSID() == MMI->getWavefrontSSID()) { 380 return Changed; 381 } else { 382 reportUnknownSynchScope(MI); 383 return Changed; 384 } 385 } 386 387 return Changed; 388 } 389 390 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, 391 MachineBasicBlock::iterator &MI) { 392 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); 393 394 bool Changed = false; 395 396 if (MOI.isAtomic()) { 397 if (MOI.getSSID() == SyncScope::System || 398 MOI.getSSID() == MMI->getAgentSSID()) { 399 if (MOI.getOrdering() == AtomicOrdering::Acquire || 400 MOI.getOrdering() == AtomicOrdering::Release || 401 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 402 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 403 Changed |= insertWaitcntVmcnt0(MI); 404 405 if (MOI.getOrdering() == AtomicOrdering::Acquire || 406 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 407 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 408 Changed |= insertBufferWbinvl1Vol(MI); 409 410 AtomicPseudoMIs.push_back(MI); 411 return Changed; 412 } else if (MOI.getSSID() == SyncScope::SingleThread || 413 MOI.getSSID() == MMI->getWorkgroupSSID() || 414 MOI.getSSID() == MMI->getWavefrontSSID()) { 415 AtomicPseudoMIs.push_back(MI); 416 return Changed; 417 } else { 418 reportUnknownSynchScope(MI); 419 return Changed; 420 } 421 } 422 423 return Changed; 424 } 425 426 bool SIMemoryLegalizer::expandAtomicCmpxchg(const SIMemOpInfo &MOI, 427 MachineBasicBlock::iterator &MI) { 428 assert(MI->mayLoad() && MI->mayStore()); 429 430 bool Changed = false; 431 432 if (MOI.isAtomic()) { 433 if (MOI.getSSID() == SyncScope::System || 434 MOI.getSSID() == MMI->getAgentSSID()) { 435 if (MOI.getOrdering() == AtomicOrdering::Release || 436 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 437 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || 438 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) 439 Changed |= insertWaitcntVmcnt0(MI); 440 441 if (MOI.getOrdering() == AtomicOrdering::Acquire || 442 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 443 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || 444 MOI.getFailureOrdering() == AtomicOrdering::Acquire || 445 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) { 446 Changed |= insertWaitcntVmcnt0(MI, false); 447 Changed |= insertBufferWbinvl1Vol(MI, false); 448 } 449 450 return Changed; 451 } else if (MOI.getSSID() == SyncScope::SingleThread || 452 MOI.getSSID() == MMI->getWorkgroupSSID() || 453 MOI.getSSID() == MMI->getWavefrontSSID()) { 454 Changed |= setGLC(MI); 455 return Changed; 456 } else { 457 reportUnknownSynchScope(MI); 458 return Changed; 459 } 460 } 461 462 return Changed; 463 } 464 465 bool SIMemoryLegalizer::expandAtomicRmw(const SIMemOpInfo &MOI, 466 MachineBasicBlock::iterator &MI) { 467 assert(MI->mayLoad() && MI->mayStore()); 468 469 bool Changed = false; 470 471 if (MOI.isAtomic()) { 472 if (MOI.getSSID() == SyncScope::System || 473 MOI.getSSID() == MMI->getAgentSSID()) { 474 if (MOI.getOrdering() == AtomicOrdering::Release || 475 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 476 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 477 Changed |= insertWaitcntVmcnt0(MI); 478 479 if (MOI.getOrdering() == AtomicOrdering::Acquire || 480 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 481 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) { 482 Changed |= insertWaitcntVmcnt0(MI, false); 483 Changed |= insertBufferWbinvl1Vol(MI, false); 484 } 485 486 return Changed; 487 } else if (MOI.getSSID() == SyncScope::SingleThread || 488 MOI.getSSID() == MMI->getWorkgroupSSID() || 489 MOI.getSSID() == MMI->getWavefrontSSID()) { 490 Changed |= setGLC(MI); 491 return Changed; 492 } else { 493 reportUnknownSynchScope(MI); 494 return Changed; 495 } 496 } 497 498 return Changed; 499 } 500 501 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { 502 bool Changed = false; 503 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 504 const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); 505 506 CTX = &MF.getFunction()->getContext(); 507 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); 508 TII = ST.getInstrInfo(); 509 510 Vmcnt0Immediate = 511 AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); 512 Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? 513 AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; 514 515 for (auto &MBB : MF) { 516 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { 517 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) 518 continue; 519 520 if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI)) 521 Changed |= expandLoad(MOI.getValue(), MI); 522 else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI)) 523 Changed |= expandStore(MOI.getValue(), MI); 524 else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI)) 525 Changed |= expandAtomicFence(MOI.getValue(), MI); 526 else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgInfo(MI)) 527 Changed |= expandAtomicCmpxchg(MOI.getValue(), MI); 528 else if (const auto &MOI = SIMemOpInfo::getAtomicRmwInfo(MI)) 529 Changed |= expandAtomicRmw(MOI.getValue(), MI); 530 } 531 } 532 533 Changed |= removeAtomicPseudoMIs(); 534 return Changed; 535 } 536 537 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) 538 539 char SIMemoryLegalizer::ID = 0; 540 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; 541 542 FunctionPass *llvm::createSIMemoryLegalizerPass() { 543 return new SIMemoryLegalizer(); 544 } 545