xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (revision 44b30b453743e95d79ba69a7b9155e23ed4595e5)
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Memory legalizer - implements memory model. More information can be
12 /// found here:
13 ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUMachineModuleInfo.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIDefines.h"
21 #include "SIInstrInfo.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "Utils/AMDGPUBaseInfo.h"
24 #include "llvm/ADT/None.h"
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineMemOperand.h"
31 #include "llvm/CodeGen/MachineModuleInfo.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/AtomicOrdering.h"
40 #include <cassert>
41 #include <list>
42 
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 
46 #define DEBUG_TYPE "si-memory-legalizer"
47 #define PASS_NAME "SI Memory Legalizer"
48 
49 namespace {
50 
51 class SIMemOpInfo final {
52 private:
53   SyncScope::ID SSID = SyncScope::System;
54   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
55   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
56   bool IsNonTemporal = false;
57 
58   SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering)
59       : SSID(SSID), Ordering(Ordering) {}
60 
61   SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering,
62               AtomicOrdering FailureOrdering, bool IsNonTemporal = false)
63       : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering),
64         IsNonTemporal(IsNonTemporal) {}
65 
66   /// \returns Info constructed from \p MI, which has at least machine memory
67   /// operand.
68   static Optional<SIMemOpInfo> constructFromMIWithMMO(
69       const MachineBasicBlock::iterator &MI);
70 
71 public:
72   /// \returns Synchronization scope ID of the machine instruction used to
73   /// create this SIMemOpInfo.
74   SyncScope::ID getSSID() const {
75     return SSID;
76   }
77   /// \returns Ordering constraint of the machine instruction used to
78   /// create this SIMemOpInfo.
79   AtomicOrdering getOrdering() const {
80     return Ordering;
81   }
82   /// \returns Failure ordering constraint of the machine instruction used to
83   /// create this SIMemOpInfo.
84   AtomicOrdering getFailureOrdering() const {
85     return FailureOrdering;
86   }
87   /// \returns True if memory access of the machine instruction used to
88   /// create this SIMemOpInfo is non-temporal, false otherwise.
89   bool isNonTemporal() const {
90     return IsNonTemporal;
91   }
92 
93   /// \returns True if ordering constraint of the machine instruction used to
94   /// create this SIMemOpInfo is unordered or higher, false otherwise.
95   bool isAtomic() const {
96     return Ordering != AtomicOrdering::NotAtomic;
97   }
98 
99   /// \returns Load info if \p MI is a load operation, "None" otherwise.
100   static Optional<SIMemOpInfo> getLoadInfo(
101       const MachineBasicBlock::iterator &MI);
102   /// \returns Store info if \p MI is a store operation, "None" otherwise.
103   static Optional<SIMemOpInfo> getStoreInfo(
104       const MachineBasicBlock::iterator &MI);
105   /// \returns Atomic fence info if \p MI is an atomic fence operation,
106   /// "None" otherwise.
107   static Optional<SIMemOpInfo> getAtomicFenceInfo(
108       const MachineBasicBlock::iterator &MI);
109   /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
110   /// rmw operation, "None" otherwise.
111   static Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
112       const MachineBasicBlock::iterator &MI);
113 
114   /// Reports unknown synchronization scope used in \p MI to LLVM
115   /// context.
116   static void reportUnknownSyncScope(
117       const MachineBasicBlock::iterator &MI);
118 };
119 
120 class SIMemoryLegalizer final : public MachineFunctionPass {
121 private:
122   /// Machine module info.
123   const AMDGPUMachineModuleInfo *MMI = nullptr;
124 
125   /// Instruction info.
126   const SIInstrInfo *TII = nullptr;
127 
128   /// Immediate for "vmcnt(0)".
129   unsigned Vmcnt0Immediate = 0;
130 
131   /// Opcode for cache invalidation instruction (L1).
132   unsigned VmemSIMDCacheInvalidateOpc = 0;
133 
134   /// List of atomic pseudo instructions.
135   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
136 
137   /// Sets named bit (BitName) to "true" if present in \p MI. Returns
138   /// true if \p MI is modified, false otherwise.
139   template <uint16_t BitName>
140   bool enableNamedBit(const MachineBasicBlock::iterator &MI) const {
141     int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
142     if (BitIdx == -1)
143       return false;
144 
145     MachineOperand &Bit = MI->getOperand(BitIdx);
146     if (Bit.getImm() != 0)
147       return false;
148 
149     Bit.setImm(1);
150     return true;
151   }
152 
153   /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
154   /// is modified, false otherwise.
155   bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
156     return enableNamedBit<AMDGPU::OpName::glc>(MI);
157   }
158 
159   /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
160   /// is modified, false otherwise.
161   bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
162     return enableNamedBit<AMDGPU::OpName::slc>(MI);
163   }
164 
165   /// Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
166   /// Always returns true.
167   bool insertVmemSIMDCacheInvalidate(MachineBasicBlock::iterator &MI,
168                                      bool Before = true) const;
169   /// Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
170   /// Always returns true.
171   bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
172                            bool Before = true) const;
173 
174   /// Removes all processed atomic pseudo instructions from the current
175   /// function. Returns true if current function is modified, false otherwise.
176   bool removeAtomicPseudoMIs();
177 
178   /// Expands load operation \p MI. Returns true if instructions are
179   /// added/deleted or \p MI is modified, false otherwise.
180   bool expandLoad(const SIMemOpInfo &MOI,
181                   MachineBasicBlock::iterator &MI);
182   /// Expands store operation \p MI. Returns true if instructions are
183   /// added/deleted or \p MI is modified, false otherwise.
184   bool expandStore(const SIMemOpInfo &MOI,
185                    MachineBasicBlock::iterator &MI);
186   /// Expands atomic fence operation \p MI. Returns true if
187   /// instructions are added/deleted or \p MI is modified, false otherwise.
188   bool expandAtomicFence(const SIMemOpInfo &MOI,
189                          MachineBasicBlock::iterator &MI);
190   /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
191   /// instructions are added/deleted or \p MI is modified, false otherwise.
192   bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
193                                 MachineBasicBlock::iterator &MI);
194 
195 public:
196   static char ID;
197 
198   SIMemoryLegalizer() : MachineFunctionPass(ID) {}
199 
200   void getAnalysisUsage(AnalysisUsage &AU) const override {
201     AU.setPreservesCFG();
202     MachineFunctionPass::getAnalysisUsage(AU);
203   }
204 
205   StringRef getPassName() const override {
206     return PASS_NAME;
207   }
208 
209   bool runOnMachineFunction(MachineFunction &MF) override;
210 };
211 
212 } // end namespace anonymous
213 
214 /* static */
215 Optional<SIMemOpInfo> SIMemOpInfo::constructFromMIWithMMO(
216     const MachineBasicBlock::iterator &MI) {
217   assert(MI->getNumMemOperands() > 0);
218 
219   const MachineFunction *MF = MI->getParent()->getParent();
220   const AMDGPUMachineModuleInfo *MMI =
221       &MF->getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
222 
223   SyncScope::ID SSID = SyncScope::SingleThread;
224   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
225   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
226   bool IsNonTemporal = true;
227 
228   // Validator should check whether or not MMOs cover the entire set of
229   // locations accessed by the memory instruction.
230   for (const auto &MMO : MI->memoperands()) {
231     const auto &IsSyncScopeInclusion =
232         MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
233     if (!IsSyncScopeInclusion) {
234       reportUnknownSyncScope(MI);
235       return None;
236     }
237 
238     SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
239     Ordering =
240         isStrongerThan(Ordering, MMO->getOrdering()) ?
241             Ordering : MMO->getOrdering();
242     FailureOrdering =
243         isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
244             FailureOrdering : MMO->getFailureOrdering();
245 
246     if (!(MMO->getFlags() & MachineMemOperand::MONonTemporal))
247       IsNonTemporal = false;
248   }
249 
250   return SIMemOpInfo(SSID, Ordering, FailureOrdering, IsNonTemporal);
251 }
252 
253 /* static */
254 Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo(
255     const MachineBasicBlock::iterator &MI) {
256   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
257 
258   if (!(MI->mayLoad() && !MI->mayStore()))
259     return None;
260 
261   // Be conservative if there are no memory operands.
262   if (MI->getNumMemOperands() == 0)
263     return SIMemOpInfo(SyncScope::System,
264                        AtomicOrdering::SequentiallyConsistent);
265 
266   return SIMemOpInfo::constructFromMIWithMMO(MI);
267 }
268 
269 /* static */
270 Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo(
271     const MachineBasicBlock::iterator &MI) {
272   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
273 
274   if (!(!MI->mayLoad() && MI->mayStore()))
275     return None;
276 
277   // Be conservative if there are no memory operands.
278   if (MI->getNumMemOperands() == 0)
279     return SIMemOpInfo(SyncScope::System,
280                        AtomicOrdering::SequentiallyConsistent);
281 
282   return SIMemOpInfo::constructFromMIWithMMO(MI);
283 }
284 
285 /* static */
286 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo(
287     const MachineBasicBlock::iterator &MI) {
288   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
289 
290   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
291     return None;
292 
293   SyncScope::ID SSID =
294       static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
295   AtomicOrdering Ordering =
296       static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
297   return SIMemOpInfo(SSID, Ordering);
298 }
299 
300 /* static */
301 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgOrRmwInfo(
302     const MachineBasicBlock::iterator &MI) {
303   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
304 
305   if (!(MI->mayLoad() && MI->mayStore()))
306     return None;
307 
308   // Be conservative if there are no memory operands.
309   if (MI->getNumMemOperands() == 0)
310     return SIMemOpInfo(SyncScope::System,
311                        AtomicOrdering::SequentiallyConsistent,
312                        AtomicOrdering::SequentiallyConsistent);
313 
314   return SIMemOpInfo::constructFromMIWithMMO(MI);
315 }
316 
317 /* static */
318 void SIMemOpInfo::reportUnknownSyncScope(
319     const MachineBasicBlock::iterator &MI) {
320   DiagnosticInfoUnsupported Diag(MI->getParent()->getParent()->getFunction(),
321                                  "Unsupported synchronization scope");
322   LLVMContext *CTX = &MI->getParent()->getParent()->getFunction().getContext();
323   CTX->diagnose(Diag);
324 }
325 
326 bool SIMemoryLegalizer::insertVmemSIMDCacheInvalidate(
327   MachineBasicBlock::iterator &MI, bool Before) const {
328   MachineBasicBlock &MBB = *MI->getParent();
329   DebugLoc DL = MI->getDebugLoc();
330 
331   if (!Before)
332     ++MI;
333 
334   BuildMI(MBB, MI, DL, TII->get(VmemSIMDCacheInvalidateOpc));
335 
336   if (!Before)
337     --MI;
338 
339   return true;
340 }
341 
342 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
343                                             bool Before) const {
344   MachineBasicBlock &MBB = *MI->getParent();
345   DebugLoc DL = MI->getDebugLoc();
346 
347   if (!Before)
348     ++MI;
349 
350   BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
351 
352   if (!Before)
353     --MI;
354 
355   return true;
356 }
357 
358 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
359   if (AtomicPseudoMIs.empty())
360     return false;
361 
362   for (auto &MI : AtomicPseudoMIs)
363     MI->eraseFromParent();
364 
365   AtomicPseudoMIs.clear();
366   return true;
367 }
368 
369 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
370                                    MachineBasicBlock::iterator &MI) {
371   assert(MI->mayLoad() && !MI->mayStore());
372 
373   bool Changed = false;
374 
375   if (MOI.isAtomic()) {
376     if (MOI.getSSID() == SyncScope::System ||
377         MOI.getSSID() == MMI->getAgentSSID()) {
378       if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
379           MOI.getOrdering() == AtomicOrdering::Acquire ||
380           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
381         Changed |= enableGLCBit(MI);
382 
383       if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
384         Changed |= insertWaitcntVmcnt0(MI);
385 
386       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
387           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
388         Changed |= insertWaitcntVmcnt0(MI, false);
389         Changed |= insertVmemSIMDCacheInvalidate(MI, false);
390       }
391 
392       return Changed;
393     }
394 
395     if (MOI.getSSID() == SyncScope::SingleThread ||
396         MOI.getSSID() == MMI->getWorkgroupSSID() ||
397         MOI.getSSID() == MMI->getWavefrontSSID()) {
398       return Changed;
399     }
400 
401     llvm_unreachable("Unsupported synchronization scope");
402   }
403 
404   // Atomic instructions do not have the nontemporal attribute.
405   if (MOI.isNonTemporal()) {
406     Changed |= enableGLCBit(MI);
407     Changed |= enableSLCBit(MI);
408     return Changed;
409   }
410 
411   return Changed;
412 }
413 
414 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
415                                     MachineBasicBlock::iterator &MI) {
416   assert(!MI->mayLoad() && MI->mayStore());
417 
418   bool Changed = false;
419 
420   if (MOI.isAtomic()) {
421     if (MOI.getSSID() == SyncScope::System ||
422         MOI.getSSID() == MMI->getAgentSSID()) {
423       if (MOI.getOrdering() == AtomicOrdering::Release ||
424           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
425         Changed |= insertWaitcntVmcnt0(MI);
426 
427       return Changed;
428     }
429 
430     if (MOI.getSSID() == SyncScope::SingleThread ||
431         MOI.getSSID() == MMI->getWorkgroupSSID() ||
432         MOI.getSSID() == MMI->getWavefrontSSID()) {
433       return Changed;
434     }
435 
436     llvm_unreachable("Unsupported synchronization scope");
437   }
438 
439   // Atomic instructions do not have the nontemporal attribute.
440   if (MOI.isNonTemporal()) {
441     Changed |= enableGLCBit(MI);
442     Changed |= enableSLCBit(MI);
443     return Changed;
444   }
445 
446   return Changed;
447 }
448 
449 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
450                                           MachineBasicBlock::iterator &MI) {
451   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
452 
453   bool Changed = false;
454 
455   if (MOI.isAtomic()) {
456     if (MOI.getSSID() == SyncScope::System ||
457         MOI.getSSID() == MMI->getAgentSSID()) {
458       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
459           MOI.getOrdering() == AtomicOrdering::Release ||
460           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
461           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
462         Changed |= insertWaitcntVmcnt0(MI);
463 
464       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
465           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
466           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
467         Changed |= insertVmemSIMDCacheInvalidate(MI);
468 
469       AtomicPseudoMIs.push_back(MI);
470       return Changed;
471     }
472 
473     if (MOI.getSSID() == SyncScope::SingleThread ||
474         MOI.getSSID() == MMI->getWorkgroupSSID() ||
475         MOI.getSSID() == MMI->getWavefrontSSID()) {
476       AtomicPseudoMIs.push_back(MI);
477       return Changed;
478     }
479 
480     SIMemOpInfo::reportUnknownSyncScope(MI);
481   }
482 
483   return Changed;
484 }
485 
486 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
487   MachineBasicBlock::iterator &MI) {
488   assert(MI->mayLoad() && MI->mayStore());
489 
490   bool Changed = false;
491 
492   if (MOI.isAtomic()) {
493     if (MOI.getSSID() == SyncScope::System ||
494         MOI.getSSID() == MMI->getAgentSSID()) {
495       if (MOI.getOrdering() == AtomicOrdering::Release ||
496           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
497           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
498           MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
499         Changed |= insertWaitcntVmcnt0(MI);
500 
501       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
502           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
503           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
504           MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
505           MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
506         Changed |= insertWaitcntVmcnt0(MI, false);
507         Changed |= insertVmemSIMDCacheInvalidate(MI, false);
508       }
509 
510       return Changed;
511     }
512 
513     if (MOI.getSSID() == SyncScope::SingleThread ||
514         MOI.getSSID() == MMI->getWorkgroupSSID() ||
515         MOI.getSSID() == MMI->getWavefrontSSID()) {
516       Changed |= enableGLCBit(MI);
517       return Changed;
518     }
519 
520     llvm_unreachable("Unsupported synchronization scope");
521   }
522 
523   return Changed;
524 }
525 
526 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
527   bool Changed = false;
528   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
529   const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
530 
531   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
532   TII = ST.getInstrInfo();
533 
534   Vmcnt0Immediate =
535       AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
536   VmemSIMDCacheInvalidateOpc =
537      ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
538        AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
539 
540   for (auto &MBB : MF) {
541     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
542       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
543         continue;
544 
545       if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI))
546         Changed |= expandLoad(MOI.getValue(), MI);
547       else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI))
548         Changed |= expandStore(MOI.getValue(), MI);
549       else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI))
550         Changed |= expandAtomicFence(MOI.getValue(), MI);
551       else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgOrRmwInfo(MI))
552         Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
553     }
554   }
555 
556   Changed |= removeAtomicPseudoMIs();
557   return Changed;
558 }
559 
560 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
561 
562 char SIMemoryLegalizer::ID = 0;
563 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
564 
565 FunctionPass *llvm::createSIMemoryLegalizerPass() {
566   return new SIMemoryLegalizer();
567 }
568