1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Memory legalizer - implements memory model. More information can be 12 /// found here: 13 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "AMDGPU.h" 18 #include "AMDGPUMachineModuleInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "Utils/AMDGPUBaseInfo.h" 23 #include "llvm/ADT/None.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineMemOperand.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/IR/DiagnosticInfo.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/LLVMContext.h" 36 #include "llvm/MC/MCInstrDesc.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include <cassert> 40 #include <list> 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 45 #define DEBUG_TYPE "si-memory-legalizer" 46 #define PASS_NAME "SI Memory Legalizer" 47 48 namespace { 49 50 class SIMemOpInfo final { 51 private: 52 SyncScope::ID SSID = SyncScope::System; 53 AtomicOrdering Ordering = AtomicOrdering::NotAtomic; 54 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; 55 bool IsNonTemporal = false; 56 57 SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering) 58 : SSID(SSID), Ordering(Ordering) {} 59 60 SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering, 61 AtomicOrdering FailureOrdering, bool IsNonTemporal = false) 62 : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering), 63 IsNonTemporal(IsNonTemporal) {} 64 65 /// \returns Info constructed from \p MI, which has at least machine memory 66 /// operand. 67 static Optional<SIMemOpInfo> constructFromMIWithMMO( 68 const MachineBasicBlock::iterator &MI); 69 70 public: 71 /// \returns Synchronization scope ID of the machine instruction used to 72 /// create this SIMemOpInfo. 73 SyncScope::ID getSSID() const { 74 return SSID; 75 } 76 /// \returns Ordering constraint of the machine instruction used to 77 /// create this SIMemOpInfo. 78 AtomicOrdering getOrdering() const { 79 return Ordering; 80 } 81 /// \returns Failure ordering constraint of the machine instruction used to 82 /// create this SIMemOpInfo. 83 AtomicOrdering getFailureOrdering() const { 84 return FailureOrdering; 85 } 86 /// \returns True if memory access of the machine instruction used to 87 /// create this SIMemOpInfo is non-temporal, false otherwise. 88 bool isNonTemporal() const { 89 return IsNonTemporal; 90 } 91 92 /// \returns True if ordering constraint of the machine instruction used to 93 /// create this SIMemOpInfo is unordered or higher, false otherwise. 94 bool isAtomic() const { 95 return Ordering != AtomicOrdering::NotAtomic; 96 } 97 98 /// \returns Load info if \p MI is a load operation, "None" otherwise. 99 static Optional<SIMemOpInfo> getLoadInfo( 100 const MachineBasicBlock::iterator &MI); 101 /// \returns Store info if \p MI is a store operation, "None" otherwise. 102 static Optional<SIMemOpInfo> getStoreInfo( 103 const MachineBasicBlock::iterator &MI); 104 /// \returns Atomic fence info if \p MI is an atomic fence operation, 105 /// "None" otherwise. 106 static Optional<SIMemOpInfo> getAtomicFenceInfo( 107 const MachineBasicBlock::iterator &MI); 108 /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or 109 /// rmw operation, "None" otherwise. 110 static Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo( 111 const MachineBasicBlock::iterator &MI); 112 113 /// Reports unknown synchronization scope used in \p MI to LLVM 114 /// context. 115 static void reportUnknownSyncScope( 116 const MachineBasicBlock::iterator &MI); 117 }; 118 119 class SIMemoryLegalizer final : public MachineFunctionPass { 120 private: 121 /// Machine module info. 122 const AMDGPUMachineModuleInfo *MMI = nullptr; 123 124 /// Instruction info. 125 const SIInstrInfo *TII = nullptr; 126 127 /// Immediate for "vmcnt(0)". 128 unsigned Vmcnt0Immediate = 0; 129 130 /// Opcode for cache invalidation instruction (L1). 131 unsigned VmemSIMDCacheInvalidateOpc = 0; 132 133 /// List of atomic pseudo instructions. 134 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs; 135 136 /// Sets named bit (BitName) to "true" if present in \p MI. Returns 137 /// true if \p MI is modified, false otherwise. 138 template <uint16_t BitName> 139 bool enableNamedBit(const MachineBasicBlock::iterator &MI) const { 140 int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName); 141 if (BitIdx == -1) 142 return false; 143 144 MachineOperand &Bit = MI->getOperand(BitIdx); 145 if (Bit.getImm() != 0) 146 return false; 147 148 Bit.setImm(1); 149 return true; 150 } 151 152 /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI 153 /// is modified, false otherwise. 154 bool enableGLCBit(const MachineBasicBlock::iterator &MI) const { 155 return enableNamedBit<AMDGPU::OpName::glc>(MI); 156 } 157 158 /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI 159 /// is modified, false otherwise. 160 bool enableSLCBit(const MachineBasicBlock::iterator &MI) const { 161 return enableNamedBit<AMDGPU::OpName::slc>(MI); 162 } 163 164 /// Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. 165 /// Always returns true. 166 bool insertVmemSIMDCacheInvalidate(MachineBasicBlock::iterator &MI, 167 bool Before = true) const; 168 /// Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. 169 /// Always returns true. 170 bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 171 bool Before = true) const; 172 173 /// Removes all processed atomic pseudo instructions from the current 174 /// function. Returns true if current function is modified, false otherwise. 175 bool removeAtomicPseudoMIs(); 176 177 /// Expands load operation \p MI. Returns true if instructions are 178 /// added/deleted or \p MI is modified, false otherwise. 179 bool expandLoad(const SIMemOpInfo &MOI, 180 MachineBasicBlock::iterator &MI); 181 /// Expands store operation \p MI. Returns true if instructions are 182 /// added/deleted or \p MI is modified, false otherwise. 183 bool expandStore(const SIMemOpInfo &MOI, 184 MachineBasicBlock::iterator &MI); 185 /// Expands atomic fence operation \p MI. Returns true if 186 /// instructions are added/deleted or \p MI is modified, false otherwise. 187 bool expandAtomicFence(const SIMemOpInfo &MOI, 188 MachineBasicBlock::iterator &MI); 189 /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if 190 /// instructions are added/deleted or \p MI is modified, false otherwise. 191 bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI, 192 MachineBasicBlock::iterator &MI); 193 194 public: 195 static char ID; 196 197 SIMemoryLegalizer() : MachineFunctionPass(ID) {} 198 199 void getAnalysisUsage(AnalysisUsage &AU) const override { 200 AU.setPreservesCFG(); 201 MachineFunctionPass::getAnalysisUsage(AU); 202 } 203 204 StringRef getPassName() const override { 205 return PASS_NAME; 206 } 207 208 bool runOnMachineFunction(MachineFunction &MF) override; 209 }; 210 211 } // end namespace anonymous 212 213 /* static */ 214 Optional<SIMemOpInfo> SIMemOpInfo::constructFromMIWithMMO( 215 const MachineBasicBlock::iterator &MI) { 216 assert(MI->getNumMemOperands() > 0); 217 218 const MachineFunction *MF = MI->getParent()->getParent(); 219 const AMDGPUMachineModuleInfo *MMI = 220 &MF->getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); 221 222 SyncScope::ID SSID = SyncScope::SingleThread; 223 AtomicOrdering Ordering = AtomicOrdering::NotAtomic; 224 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; 225 bool IsNonTemporal = true; 226 227 // Validator should check whether or not MMOs cover the entire set of 228 // locations accessed by the memory instruction. 229 for (const auto &MMO : MI->memoperands()) { 230 const auto &IsSyncScopeInclusion = 231 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID()); 232 if (!IsSyncScopeInclusion) { 233 reportUnknownSyncScope(MI); 234 return None; 235 } 236 237 SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID(); 238 Ordering = 239 isStrongerThan(Ordering, MMO->getOrdering()) ? 240 Ordering : MMO->getOrdering(); 241 FailureOrdering = 242 isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ? 243 FailureOrdering : MMO->getFailureOrdering(); 244 245 if (!(MMO->getFlags() & MachineMemOperand::MONonTemporal)) 246 IsNonTemporal = false; 247 } 248 249 return SIMemOpInfo(SSID, Ordering, FailureOrdering, IsNonTemporal); 250 } 251 252 /* static */ 253 Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo( 254 const MachineBasicBlock::iterator &MI) { 255 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 256 257 if (!(MI->mayLoad() && !MI->mayStore())) 258 return None; 259 260 // Be conservative if there are no memory operands. 261 if (MI->getNumMemOperands() == 0) 262 return SIMemOpInfo(SyncScope::System, 263 AtomicOrdering::SequentiallyConsistent); 264 265 return SIMemOpInfo::constructFromMIWithMMO(MI); 266 } 267 268 /* static */ 269 Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo( 270 const MachineBasicBlock::iterator &MI) { 271 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 272 273 if (!(!MI->mayLoad() && MI->mayStore())) 274 return None; 275 276 // Be conservative if there are no memory operands. 277 if (MI->getNumMemOperands() == 0) 278 return SIMemOpInfo(SyncScope::System, 279 AtomicOrdering::SequentiallyConsistent); 280 281 return SIMemOpInfo::constructFromMIWithMMO(MI); 282 } 283 284 /* static */ 285 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo( 286 const MachineBasicBlock::iterator &MI) { 287 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 288 289 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) 290 return None; 291 292 SyncScope::ID SSID = 293 static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); 294 AtomicOrdering Ordering = 295 static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); 296 return SIMemOpInfo(SSID, Ordering); 297 } 298 299 /* static */ 300 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgOrRmwInfo( 301 const MachineBasicBlock::iterator &MI) { 302 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 303 304 if (!(MI->mayLoad() && MI->mayStore())) 305 return None; 306 307 // Be conservative if there are no memory operands. 308 if (MI->getNumMemOperands() == 0) 309 return SIMemOpInfo(SyncScope::System, 310 AtomicOrdering::SequentiallyConsistent, 311 AtomicOrdering::SequentiallyConsistent); 312 313 return SIMemOpInfo::constructFromMIWithMMO(MI); 314 } 315 316 /* static */ 317 void SIMemOpInfo::reportUnknownSyncScope( 318 const MachineBasicBlock::iterator &MI) { 319 DiagnosticInfoUnsupported Diag(MI->getParent()->getParent()->getFunction(), 320 "Unsupported synchronization scope"); 321 LLVMContext *CTX = &MI->getParent()->getParent()->getFunction().getContext(); 322 CTX->diagnose(Diag); 323 } 324 325 bool SIMemoryLegalizer::insertVmemSIMDCacheInvalidate( 326 MachineBasicBlock::iterator &MI, bool Before) const { 327 MachineBasicBlock &MBB = *MI->getParent(); 328 DebugLoc DL = MI->getDebugLoc(); 329 330 if (!Before) 331 ++MI; 332 333 BuildMI(MBB, MI, DL, TII->get(VmemSIMDCacheInvalidateOpc)); 334 335 if (!Before) 336 --MI; 337 338 return true; 339 } 340 341 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 342 bool Before) const { 343 MachineBasicBlock &MBB = *MI->getParent(); 344 DebugLoc DL = MI->getDebugLoc(); 345 346 if (!Before) 347 ++MI; 348 349 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); 350 351 if (!Before) 352 --MI; 353 354 return true; 355 } 356 357 bool SIMemoryLegalizer::removeAtomicPseudoMIs() { 358 if (AtomicPseudoMIs.empty()) 359 return false; 360 361 for (auto &MI : AtomicPseudoMIs) 362 MI->eraseFromParent(); 363 364 AtomicPseudoMIs.clear(); 365 return true; 366 } 367 368 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI, 369 MachineBasicBlock::iterator &MI) { 370 assert(MI->mayLoad() && !MI->mayStore()); 371 372 bool Changed = false; 373 374 if (MOI.isAtomic()) { 375 if (MOI.getSSID() == SyncScope::System || 376 MOI.getSSID() == MMI->getAgentSSID()) { 377 if (MOI.getOrdering() == AtomicOrdering::Monotonic || 378 MOI.getOrdering() == AtomicOrdering::Acquire || 379 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 380 Changed |= enableGLCBit(MI); 381 382 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 383 Changed |= insertWaitcntVmcnt0(MI); 384 385 if (MOI.getOrdering() == AtomicOrdering::Acquire || 386 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) { 387 Changed |= insertWaitcntVmcnt0(MI, false); 388 Changed |= insertVmemSIMDCacheInvalidate(MI, false); 389 } 390 391 return Changed; 392 } 393 394 if (MOI.getSSID() == SyncScope::SingleThread || 395 MOI.getSSID() == MMI->getWorkgroupSSID() || 396 MOI.getSSID() == MMI->getWavefrontSSID()) { 397 return Changed; 398 } 399 400 llvm_unreachable("Unsupported synchronization scope"); 401 } 402 403 // Atomic instructions do not have the nontemporal attribute. 404 if (MOI.isNonTemporal()) { 405 Changed |= enableGLCBit(MI); 406 Changed |= enableSLCBit(MI); 407 return Changed; 408 } 409 410 return Changed; 411 } 412 413 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI, 414 MachineBasicBlock::iterator &MI) { 415 assert(!MI->mayLoad() && MI->mayStore()); 416 417 bool Changed = false; 418 419 if (MOI.isAtomic()) { 420 if (MOI.getSSID() == SyncScope::System || 421 MOI.getSSID() == MMI->getAgentSSID()) { 422 if (MOI.getOrdering() == AtomicOrdering::Release || 423 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 424 Changed |= insertWaitcntVmcnt0(MI); 425 426 return Changed; 427 } 428 429 if (MOI.getSSID() == SyncScope::SingleThread || 430 MOI.getSSID() == MMI->getWorkgroupSSID() || 431 MOI.getSSID() == MMI->getWavefrontSSID()) { 432 return Changed; 433 } 434 435 llvm_unreachable("Unsupported synchronization scope"); 436 } 437 438 // Atomic instructions do not have the nontemporal attribute. 439 if (MOI.isNonTemporal()) { 440 Changed |= enableGLCBit(MI); 441 Changed |= enableSLCBit(MI); 442 return Changed; 443 } 444 445 return Changed; 446 } 447 448 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, 449 MachineBasicBlock::iterator &MI) { 450 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); 451 452 bool Changed = false; 453 454 if (MOI.isAtomic()) { 455 if (MOI.getSSID() == SyncScope::System || 456 MOI.getSSID() == MMI->getAgentSSID()) { 457 if (MOI.getOrdering() == AtomicOrdering::Acquire || 458 MOI.getOrdering() == AtomicOrdering::Release || 459 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 460 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 461 Changed |= insertWaitcntVmcnt0(MI); 462 463 if (MOI.getOrdering() == AtomicOrdering::Acquire || 464 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 465 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 466 Changed |= insertVmemSIMDCacheInvalidate(MI); 467 468 AtomicPseudoMIs.push_back(MI); 469 return Changed; 470 } 471 472 if (MOI.getSSID() == SyncScope::SingleThread || 473 MOI.getSSID() == MMI->getWorkgroupSSID() || 474 MOI.getSSID() == MMI->getWavefrontSSID()) { 475 AtomicPseudoMIs.push_back(MI); 476 return Changed; 477 } 478 479 SIMemOpInfo::reportUnknownSyncScope(MI); 480 } 481 482 return Changed; 483 } 484 485 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI, 486 MachineBasicBlock::iterator &MI) { 487 assert(MI->mayLoad() && MI->mayStore()); 488 489 bool Changed = false; 490 491 if (MOI.isAtomic()) { 492 if (MOI.getSSID() == SyncScope::System || 493 MOI.getSSID() == MMI->getAgentSSID()) { 494 if (MOI.getOrdering() == AtomicOrdering::Release || 495 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 496 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || 497 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) 498 Changed |= insertWaitcntVmcnt0(MI); 499 500 if (MOI.getOrdering() == AtomicOrdering::Acquire || 501 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 502 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || 503 MOI.getFailureOrdering() == AtomicOrdering::Acquire || 504 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) { 505 Changed |= insertWaitcntVmcnt0(MI, false); 506 Changed |= insertVmemSIMDCacheInvalidate(MI, false); 507 } 508 509 return Changed; 510 } 511 512 if (MOI.getSSID() == SyncScope::SingleThread || 513 MOI.getSSID() == MMI->getWorkgroupSSID() || 514 MOI.getSSID() == MMI->getWavefrontSSID()) { 515 Changed |= enableGLCBit(MI); 516 return Changed; 517 } 518 519 llvm_unreachable("Unsupported synchronization scope"); 520 } 521 522 return Changed; 523 } 524 525 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { 526 bool Changed = false; 527 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 528 const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); 529 530 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); 531 TII = ST.getInstrInfo(); 532 533 Vmcnt0Immediate = 534 AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); 535 VmemSIMDCacheInvalidateOpc = 536 ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? 537 AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; 538 539 for (auto &MBB : MF) { 540 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { 541 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) 542 continue; 543 544 if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI)) 545 Changed |= expandLoad(MOI.getValue(), MI); 546 else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI)) 547 Changed |= expandStore(MOI.getValue(), MI); 548 else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI)) 549 Changed |= expandAtomicFence(MOI.getValue(), MI); 550 else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgOrRmwInfo(MI)) 551 Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI); 552 } 553 } 554 555 Changed |= removeAtomicPseudoMIs(); 556 return Changed; 557 } 558 559 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) 560 561 char SIMemoryLegalizer::ID = 0; 562 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; 563 564 FunctionPass *llvm::createSIMemoryLegalizerPass() { 565 return new SIMemoryLegalizer(); 566 } 567