1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Memory legalizer - implements memory model. More information can be 12 /// found here: 13 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "AMDGPU.h" 18 #include "AMDGPUMachineModuleInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "Utils/AMDGPUBaseInfo.h" 24 #include "llvm/ADT/None.h" 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineMemOperand.h" 31 #include "llvm/CodeGen/MachineModuleInfo.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/IR/DebugLoc.h" 34 #include "llvm/IR/DiagnosticInfo.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/LLVMContext.h" 37 #include "llvm/MC/MCInstrDesc.h" 38 #include "llvm/Pass.h" 39 #include "llvm/Support/AtomicOrdering.h" 40 #include <cassert> 41 #include <list> 42 43 using namespace llvm; 44 using namespace llvm::AMDGPU; 45 46 #define DEBUG_TYPE "si-memory-legalizer" 47 #define PASS_NAME "SI Memory Legalizer" 48 49 namespace { 50 51 class SIMemOpInfo final { 52 private: 53 SyncScope::ID SSID = SyncScope::System; 54 AtomicOrdering Ordering = AtomicOrdering::NotAtomic; 55 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; 56 bool IsNonTemporal = false; 57 58 SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering) 59 : SSID(SSID), Ordering(Ordering) {} 60 61 SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering, 62 AtomicOrdering FailureOrdering, bool IsNonTemporal = false) 63 : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering), 64 IsNonTemporal(IsNonTemporal) {} 65 66 /// \returns Info constructed from \p MI, which has at least machine memory 67 /// operand. 68 static Optional<SIMemOpInfo> constructFromMIWithMMO( 69 const MachineBasicBlock::iterator &MI); 70 71 public: 72 /// \returns Synchronization scope ID of the machine instruction used to 73 /// create this SIMemOpInfo. 74 SyncScope::ID getSSID() const { 75 return SSID; 76 } 77 /// \returns Ordering constraint of the machine instruction used to 78 /// create this SIMemOpInfo. 79 AtomicOrdering getOrdering() const { 80 return Ordering; 81 } 82 /// \returns Failure ordering constraint of the machine instruction used to 83 /// create this SIMemOpInfo. 84 AtomicOrdering getFailureOrdering() const { 85 return FailureOrdering; 86 } 87 /// \returns True if memory access of the machine instruction used to 88 /// create this SIMemOpInfo is non-temporal, false otherwise. 89 bool isNonTemporal() const { 90 return IsNonTemporal; 91 } 92 93 /// \returns True if ordering constraint of the machine instruction used to 94 /// create this SIMemOpInfo is unordered or higher, false otherwise. 95 bool isAtomic() const { 96 return Ordering != AtomicOrdering::NotAtomic; 97 } 98 99 /// \returns Load info if \p MI is a load operation, "None" otherwise. 100 static Optional<SIMemOpInfo> getLoadInfo( 101 const MachineBasicBlock::iterator &MI); 102 /// \returns Store info if \p MI is a store operation, "None" otherwise. 103 static Optional<SIMemOpInfo> getStoreInfo( 104 const MachineBasicBlock::iterator &MI); 105 /// \returns Atomic fence info if \p MI is an atomic fence operation, 106 /// "None" otherwise. 107 static Optional<SIMemOpInfo> getAtomicFenceInfo( 108 const MachineBasicBlock::iterator &MI); 109 /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or 110 /// rmw operation, "None" otherwise. 111 static Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo( 112 const MachineBasicBlock::iterator &MI); 113 114 /// Reports unknown synchronization scope used in \p MI to LLVM 115 /// context. 116 static void reportUnknownSyncScope( 117 const MachineBasicBlock::iterator &MI); 118 }; 119 120 class SIMemoryLegalizer final : public MachineFunctionPass { 121 private: 122 /// Machine module info. 123 const AMDGPUMachineModuleInfo *MMI = nullptr; 124 125 /// Instruction info. 126 const SIInstrInfo *TII = nullptr; 127 128 /// Immediate for "vmcnt(0)". 129 unsigned Vmcnt0Immediate = 0; 130 131 /// Opcode for cache invalidation instruction (L1). 132 unsigned VmemSIMDCacheInvalidateOpc = 0; 133 134 /// List of atomic pseudo instructions. 135 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs; 136 137 /// Sets named bit (BitName) to "true" if present in \p MI. Returns 138 /// true if \p MI is modified, false otherwise. 139 template <uint16_t BitName> 140 bool enableNamedBit(const MachineBasicBlock::iterator &MI) const { 141 int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName); 142 if (BitIdx == -1) 143 return false; 144 145 MachineOperand &Bit = MI->getOperand(BitIdx); 146 if (Bit.getImm() != 0) 147 return false; 148 149 Bit.setImm(1); 150 return true; 151 } 152 153 /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI 154 /// is modified, false otherwise. 155 bool enableGLCBit(const MachineBasicBlock::iterator &MI) const { 156 return enableNamedBit<AMDGPU::OpName::glc>(MI); 157 } 158 159 /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI 160 /// is modified, false otherwise. 161 bool enableSLCBit(const MachineBasicBlock::iterator &MI) const { 162 return enableNamedBit<AMDGPU::OpName::slc>(MI); 163 } 164 165 /// Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. 166 /// Always returns true. 167 bool insertVmemSIMDCacheInvalidate(MachineBasicBlock::iterator &MI, 168 bool Before = true) const; 169 /// Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. 170 /// Always returns true. 171 bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 172 bool Before = true) const; 173 174 /// Removes all processed atomic pseudo instructions from the current 175 /// function. Returns true if current function is modified, false otherwise. 176 bool removeAtomicPseudoMIs(); 177 178 /// Expands load operation \p MI. Returns true if instructions are 179 /// added/deleted or \p MI is modified, false otherwise. 180 bool expandLoad(const SIMemOpInfo &MOI, 181 MachineBasicBlock::iterator &MI); 182 /// Expands store operation \p MI. Returns true if instructions are 183 /// added/deleted or \p MI is modified, false otherwise. 184 bool expandStore(const SIMemOpInfo &MOI, 185 MachineBasicBlock::iterator &MI); 186 /// Expands atomic fence operation \p MI. Returns true if 187 /// instructions are added/deleted or \p MI is modified, false otherwise. 188 bool expandAtomicFence(const SIMemOpInfo &MOI, 189 MachineBasicBlock::iterator &MI); 190 /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if 191 /// instructions are added/deleted or \p MI is modified, false otherwise. 192 bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI, 193 MachineBasicBlock::iterator &MI); 194 195 public: 196 static char ID; 197 198 SIMemoryLegalizer() : MachineFunctionPass(ID) {} 199 200 void getAnalysisUsage(AnalysisUsage &AU) const override { 201 AU.setPreservesCFG(); 202 MachineFunctionPass::getAnalysisUsage(AU); 203 } 204 205 StringRef getPassName() const override { 206 return PASS_NAME; 207 } 208 209 bool runOnMachineFunction(MachineFunction &MF) override; 210 }; 211 212 } // end namespace anonymous 213 214 /* static */ 215 Optional<SIMemOpInfo> SIMemOpInfo::constructFromMIWithMMO( 216 const MachineBasicBlock::iterator &MI) { 217 assert(MI->getNumMemOperands() > 0); 218 219 const MachineFunction *MF = MI->getParent()->getParent(); 220 const AMDGPUMachineModuleInfo *MMI = 221 &MF->getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); 222 223 SyncScope::ID SSID = SyncScope::SingleThread; 224 AtomicOrdering Ordering = AtomicOrdering::NotAtomic; 225 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; 226 bool IsNonTemporal = true; 227 228 // Validator should check whether or not MMOs cover the entire set of 229 // locations accessed by the memory instruction. 230 for (const auto &MMO : MI->memoperands()) { 231 const auto &IsSyncScopeInclusion = 232 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID()); 233 if (!IsSyncScopeInclusion) { 234 reportUnknownSyncScope(MI); 235 return None; 236 } 237 238 SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID(); 239 Ordering = 240 isStrongerThan(Ordering, MMO->getOrdering()) ? 241 Ordering : MMO->getOrdering(); 242 FailureOrdering = 243 isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ? 244 FailureOrdering : MMO->getFailureOrdering(); 245 246 if (!(MMO->getFlags() & MachineMemOperand::MONonTemporal)) 247 IsNonTemporal = false; 248 } 249 250 return SIMemOpInfo(SSID, Ordering, FailureOrdering, IsNonTemporal); 251 } 252 253 /* static */ 254 Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo( 255 const MachineBasicBlock::iterator &MI) { 256 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 257 258 if (!(MI->mayLoad() && !MI->mayStore())) 259 return None; 260 261 // Be conservative if there are no memory operands. 262 if (MI->getNumMemOperands() == 0) 263 return SIMemOpInfo(SyncScope::System, 264 AtomicOrdering::SequentiallyConsistent); 265 266 return SIMemOpInfo::constructFromMIWithMMO(MI); 267 } 268 269 /* static */ 270 Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo( 271 const MachineBasicBlock::iterator &MI) { 272 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 273 274 if (!(!MI->mayLoad() && MI->mayStore())) 275 return None; 276 277 // Be conservative if there are no memory operands. 278 if (MI->getNumMemOperands() == 0) 279 return SIMemOpInfo(SyncScope::System, 280 AtomicOrdering::SequentiallyConsistent); 281 282 return SIMemOpInfo::constructFromMIWithMMO(MI); 283 } 284 285 /* static */ 286 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo( 287 const MachineBasicBlock::iterator &MI) { 288 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 289 290 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) 291 return None; 292 293 SyncScope::ID SSID = 294 static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); 295 AtomicOrdering Ordering = 296 static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); 297 return SIMemOpInfo(SSID, Ordering); 298 } 299 300 /* static */ 301 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgOrRmwInfo( 302 const MachineBasicBlock::iterator &MI) { 303 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); 304 305 if (!(MI->mayLoad() && MI->mayStore())) 306 return None; 307 308 // Be conservative if there are no memory operands. 309 if (MI->getNumMemOperands() == 0) 310 return SIMemOpInfo(SyncScope::System, 311 AtomicOrdering::SequentiallyConsistent, 312 AtomicOrdering::SequentiallyConsistent); 313 314 return SIMemOpInfo::constructFromMIWithMMO(MI); 315 } 316 317 /* static */ 318 void SIMemOpInfo::reportUnknownSyncScope( 319 const MachineBasicBlock::iterator &MI) { 320 DiagnosticInfoUnsupported Diag(MI->getParent()->getParent()->getFunction(), 321 "Unsupported synchronization scope"); 322 LLVMContext *CTX = &MI->getParent()->getParent()->getFunction().getContext(); 323 CTX->diagnose(Diag); 324 } 325 326 bool SIMemoryLegalizer::insertVmemSIMDCacheInvalidate( 327 MachineBasicBlock::iterator &MI, bool Before) const { 328 MachineBasicBlock &MBB = *MI->getParent(); 329 DebugLoc DL = MI->getDebugLoc(); 330 331 if (!Before) 332 ++MI; 333 334 BuildMI(MBB, MI, DL, TII->get(VmemSIMDCacheInvalidateOpc)); 335 336 if (!Before) 337 --MI; 338 339 return true; 340 } 341 342 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, 343 bool Before) const { 344 MachineBasicBlock &MBB = *MI->getParent(); 345 DebugLoc DL = MI->getDebugLoc(); 346 347 if (!Before) 348 ++MI; 349 350 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); 351 352 if (!Before) 353 --MI; 354 355 return true; 356 } 357 358 bool SIMemoryLegalizer::removeAtomicPseudoMIs() { 359 if (AtomicPseudoMIs.empty()) 360 return false; 361 362 for (auto &MI : AtomicPseudoMIs) 363 MI->eraseFromParent(); 364 365 AtomicPseudoMIs.clear(); 366 return true; 367 } 368 369 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI, 370 MachineBasicBlock::iterator &MI) { 371 assert(MI->mayLoad() && !MI->mayStore()); 372 373 bool Changed = false; 374 375 if (MOI.isAtomic()) { 376 if (MOI.getSSID() == SyncScope::System || 377 MOI.getSSID() == MMI->getAgentSSID()) { 378 if (MOI.getOrdering() == AtomicOrdering::Monotonic || 379 MOI.getOrdering() == AtomicOrdering::Acquire || 380 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 381 Changed |= enableGLCBit(MI); 382 383 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 384 Changed |= insertWaitcntVmcnt0(MI); 385 386 if (MOI.getOrdering() == AtomicOrdering::Acquire || 387 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) { 388 Changed |= insertWaitcntVmcnt0(MI, false); 389 Changed |= insertVmemSIMDCacheInvalidate(MI, false); 390 } 391 392 return Changed; 393 } 394 395 if (MOI.getSSID() == SyncScope::SingleThread || 396 MOI.getSSID() == MMI->getWorkgroupSSID() || 397 MOI.getSSID() == MMI->getWavefrontSSID()) { 398 return Changed; 399 } 400 401 llvm_unreachable("Unsupported synchronization scope"); 402 } 403 404 // Atomic instructions do not have the nontemporal attribute. 405 if (MOI.isNonTemporal()) { 406 Changed |= enableGLCBit(MI); 407 Changed |= enableSLCBit(MI); 408 return Changed; 409 } 410 411 return Changed; 412 } 413 414 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI, 415 MachineBasicBlock::iterator &MI) { 416 assert(!MI->mayLoad() && MI->mayStore()); 417 418 bool Changed = false; 419 420 if (MOI.isAtomic()) { 421 if (MOI.getSSID() == SyncScope::System || 422 MOI.getSSID() == MMI->getAgentSSID()) { 423 if (MOI.getOrdering() == AtomicOrdering::Release || 424 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 425 Changed |= insertWaitcntVmcnt0(MI); 426 427 return Changed; 428 } 429 430 if (MOI.getSSID() == SyncScope::SingleThread || 431 MOI.getSSID() == MMI->getWorkgroupSSID() || 432 MOI.getSSID() == MMI->getWavefrontSSID()) { 433 return Changed; 434 } 435 436 llvm_unreachable("Unsupported synchronization scope"); 437 } 438 439 // Atomic instructions do not have the nontemporal attribute. 440 if (MOI.isNonTemporal()) { 441 Changed |= enableGLCBit(MI); 442 Changed |= enableSLCBit(MI); 443 return Changed; 444 } 445 446 return Changed; 447 } 448 449 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, 450 MachineBasicBlock::iterator &MI) { 451 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); 452 453 bool Changed = false; 454 455 if (MOI.isAtomic()) { 456 if (MOI.getSSID() == SyncScope::System || 457 MOI.getSSID() == MMI->getAgentSSID()) { 458 if (MOI.getOrdering() == AtomicOrdering::Acquire || 459 MOI.getOrdering() == AtomicOrdering::Release || 460 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 461 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 462 Changed |= insertWaitcntVmcnt0(MI); 463 464 if (MOI.getOrdering() == AtomicOrdering::Acquire || 465 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 466 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) 467 Changed |= insertVmemSIMDCacheInvalidate(MI); 468 469 AtomicPseudoMIs.push_back(MI); 470 return Changed; 471 } 472 473 if (MOI.getSSID() == SyncScope::SingleThread || 474 MOI.getSSID() == MMI->getWorkgroupSSID() || 475 MOI.getSSID() == MMI->getWavefrontSSID()) { 476 AtomicPseudoMIs.push_back(MI); 477 return Changed; 478 } 479 480 SIMemOpInfo::reportUnknownSyncScope(MI); 481 } 482 483 return Changed; 484 } 485 486 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI, 487 MachineBasicBlock::iterator &MI) { 488 assert(MI->mayLoad() && MI->mayStore()); 489 490 bool Changed = false; 491 492 if (MOI.isAtomic()) { 493 if (MOI.getSSID() == SyncScope::System || 494 MOI.getSSID() == MMI->getAgentSSID()) { 495 if (MOI.getOrdering() == AtomicOrdering::Release || 496 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 497 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || 498 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) 499 Changed |= insertWaitcntVmcnt0(MI); 500 501 if (MOI.getOrdering() == AtomicOrdering::Acquire || 502 MOI.getOrdering() == AtomicOrdering::AcquireRelease || 503 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || 504 MOI.getFailureOrdering() == AtomicOrdering::Acquire || 505 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) { 506 Changed |= insertWaitcntVmcnt0(MI, false); 507 Changed |= insertVmemSIMDCacheInvalidate(MI, false); 508 } 509 510 return Changed; 511 } 512 513 if (MOI.getSSID() == SyncScope::SingleThread || 514 MOI.getSSID() == MMI->getWorkgroupSSID() || 515 MOI.getSSID() == MMI->getWavefrontSSID()) { 516 Changed |= enableGLCBit(MI); 517 return Changed; 518 } 519 520 llvm_unreachable("Unsupported synchronization scope"); 521 } 522 523 return Changed; 524 } 525 526 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { 527 bool Changed = false; 528 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 529 const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); 530 531 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); 532 TII = ST.getInstrInfo(); 533 534 Vmcnt0Immediate = 535 AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); 536 VmemSIMDCacheInvalidateOpc = 537 ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? 538 AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; 539 540 for (auto &MBB : MF) { 541 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { 542 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) 543 continue; 544 545 if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI)) 546 Changed |= expandLoad(MOI.getValue(), MI); 547 else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI)) 548 Changed |= expandStore(MOI.getValue(), MI); 549 else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI)) 550 Changed |= expandAtomicFence(MOI.getValue(), MI); 551 else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgOrRmwInfo(MI)) 552 Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI); 553 } 554 } 555 556 Changed |= removeAtomicPseudoMIs(); 557 return Changed; 558 } 559 560 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) 561 562 char SIMemoryLegalizer::ID = 0; 563 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; 564 565 FunctionPass *llvm::createSIMemoryLegalizerPass() { 566 return new SIMemoryLegalizer(); 567 } 568