xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (revision 89377c440c64506f8c8f7edb1d609f6e22e9d697)
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Memory legalizer - implements memory model. More information can be
12 /// found here:
13 ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUMachineModuleInfo.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIDefines.h"
21 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineMemOperand.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include <cassert>
40 #include <list>
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 
45 #define DEBUG_TYPE "si-memory-legalizer"
46 #define PASS_NAME "SI Memory Legalizer"
47 
48 namespace {
49 
50 class SIMemoryLegalizer final : public MachineFunctionPass {
51 private:
52   struct MemOpInfo final {
53     SyncScope::ID SSID = SyncScope::System;
54     AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent;
55     AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent;
56 
57     MemOpInfo() = default;
58 
59     MemOpInfo(SyncScope::ID SSID,
60               AtomicOrdering Ordering,
61               AtomicOrdering FailureOrdering)
62         : SSID(SSID),
63           Ordering(Ordering),
64           FailureOrdering(FailureOrdering) {}
65 
66     MemOpInfo(const MachineMemOperand *MMO)
67         : SSID(MMO->getSyncScopeID()),
68           Ordering(MMO->getOrdering()),
69           FailureOrdering(MMO->getFailureOrdering()) {}
70   };
71 
72   /// \brief LLVM context.
73   LLVMContext *CTX = nullptr;
74 
75   /// \brief Machine module info.
76   const AMDGPUMachineModuleInfo *MMI = nullptr;
77 
78   /// \brief Instruction info.
79   const SIInstrInfo *TII = nullptr;
80 
81   /// \brief Immediate for "vmcnt(0)".
82   unsigned Vmcnt0Immediate = 0;
83 
84   /// \brief Opcode for cache invalidation instruction (L1).
85   unsigned Wbinvl1Opcode = 0;
86 
87   /// \brief List of atomic pseudo instructions.
88   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
89 
90   /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
91   /// Always returns true.
92   bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
93                               bool Before = true) const;
94   /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
95   /// Always returns true.
96   bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
97                            bool Before = true) const;
98 
99   /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is
100   /// modified, false otherwise.
101   bool setGLC(const MachineBasicBlock::iterator &MI) const;
102 
103   /// \brief Removes all processed atomic pseudo instructions from the current
104   /// function. Returns true if current function is modified, false otherwise.
105   bool removeAtomicPseudoMIs();
106 
107   /// \brief Reports unknown synchronization scope used in \p MI to LLVM
108   /// context.
109   void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI);
110 
111   /// \returns Load info if \p MI is a load operation, "None" otherwise.
112   Optional<MemOpInfo> getLoadInfo(const MachineBasicBlock::iterator &MI) const;
113   /// \returns Store info if \p MI is a store operation, "None" otherwise.
114   Optional<MemOpInfo> getStoreInfo(const MachineBasicBlock::iterator &MI) const;
115   /// \returns Atomic fence info if \p MI is an atomic fence operation,
116   /// "None" otherwise.
117   Optional<MemOpInfo> getAtomicFenceInfo(
118       const MachineBasicBlock::iterator &MI) const;
119   /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation,
120   /// "None" otherwise.
121   Optional<MemOpInfo> getAtomicCmpxchgInfo(
122       const MachineBasicBlock::iterator &MI) const;
123   /// \returns Atomic rmw info if \p MI is an atomic rmw operation,
124   /// "None" otherwise.
125   Optional<MemOpInfo> getAtomicRmwInfo(
126       const MachineBasicBlock::iterator &MI) const;
127 
128   /// \brief Expands load operation \p MI. Returns true if instructions are
129   /// added/deleted or \p MI is modified, false otherwise.
130   bool expandLoad(const MemOpInfo &MOI, MachineBasicBlock::iterator &MI);
131   /// \brief Expands store operation \p MI. Returns true if instructions are
132   /// added/deleted or \p MI is modified, false otherwise.
133   bool expandStore(const MemOpInfo &MOI, MachineBasicBlock::iterator &MI);
134   /// \brief Expands atomic fence operation \p MI. Returns true if
135   /// instructions are added/deleted or \p MI is modified, false otherwise.
136   bool expandAtomicFence(const MemOpInfo &MOI,
137                          MachineBasicBlock::iterator &MI);
138   /// \brief Expands atomic cmpxchg operation \p MI. Returns true if
139   /// instructions are added/deleted or \p MI is modified, false otherwise.
140   bool expandAtomicCmpxchg(const MemOpInfo &MOI,
141                            MachineBasicBlock::iterator &MI);
142   /// \brief Expands atomic rmw operation \p MI. Returns true if
143   /// instructions are added/deleted or \p MI is modified, false otherwise.
144   bool expandAtomicRmw(const MemOpInfo &MOI,
145                        MachineBasicBlock::iterator &MI);
146 
147 public:
148   static char ID;
149 
150   SIMemoryLegalizer() : MachineFunctionPass(ID) {}
151 
152   void getAnalysisUsage(AnalysisUsage &AU) const override {
153     AU.setPreservesCFG();
154     MachineFunctionPass::getAnalysisUsage(AU);
155   }
156 
157   StringRef getPassName() const override {
158     return PASS_NAME;
159   }
160 
161   bool runOnMachineFunction(MachineFunction &MF) override;
162 };
163 
164 } // end namespace anonymous
165 
166 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
167                                                bool Before) const {
168   MachineBasicBlock &MBB = *MI->getParent();
169   DebugLoc DL = MI->getDebugLoc();
170 
171   if (!Before)
172     ++MI;
173 
174   BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode));
175 
176   if (!Before)
177     --MI;
178 
179   return true;
180 }
181 
182 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
183                                             bool Before) const {
184   MachineBasicBlock &MBB = *MI->getParent();
185   DebugLoc DL = MI->getDebugLoc();
186 
187   if (!Before)
188     ++MI;
189 
190   BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
191 
192   if (!Before)
193     --MI;
194 
195   return true;
196 }
197 
198 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const {
199   int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc);
200   if (GLCIdx == -1)
201     return false;
202 
203   MachineOperand &GLC = MI->getOperand(GLCIdx);
204   if (GLC.getImm() == 1)
205     return false;
206 
207   GLC.setImm(1);
208   return true;
209 }
210 
211 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
212   if (AtomicPseudoMIs.empty())
213     return false;
214 
215   for (auto &MI : AtomicPseudoMIs)
216     MI->eraseFromParent();
217 
218   AtomicPseudoMIs.clear();
219   return true;
220 }
221 
222 void SIMemoryLegalizer::reportUnknownSynchScope(
223     const MachineBasicBlock::iterator &MI) {
224   DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(),
225                                  "Unsupported synchronization scope");
226   CTX->diagnose(Diag);
227 }
228 
229 Optional<SIMemoryLegalizer::MemOpInfo> SIMemoryLegalizer::getLoadInfo(
230     const MachineBasicBlock::iterator &MI) const {
231   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
232 
233   if (!(MI->mayLoad() && !MI->mayStore()))
234     return None;
235   if (!MI->hasOneMemOperand())
236     return MemOpInfo();
237 
238   const MachineMemOperand *MMO = *MI->memoperands_begin();
239   if (!MMO->isAtomic())
240     return None;
241 
242   return MemOpInfo(MMO);
243 }
244 
245 Optional<SIMemoryLegalizer::MemOpInfo> SIMemoryLegalizer::getStoreInfo(
246     const MachineBasicBlock::iterator &MI) const {
247   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
248 
249   if (!(!MI->mayLoad() && MI->mayStore()))
250     return None;
251   if (!MI->hasOneMemOperand())
252     return MemOpInfo();
253 
254   const MachineMemOperand *MMO = *MI->memoperands_begin();
255   if (!MMO->isAtomic())
256     return None;
257 
258   return MemOpInfo(MMO);
259 }
260 
261 Optional<SIMemoryLegalizer::MemOpInfo> SIMemoryLegalizer::getAtomicFenceInfo(
262     const MachineBasicBlock::iterator &MI) const {
263   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
264 
265   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
266     return None;
267 
268   SyncScope::ID SSID =
269       static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
270   AtomicOrdering Ordering =
271       static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
272   return MemOpInfo(SSID, Ordering, AtomicOrdering::NotAtomic);
273 }
274 
275 Optional<SIMemoryLegalizer::MemOpInfo> SIMemoryLegalizer::getAtomicCmpxchgInfo(
276     const MachineBasicBlock::iterator &MI) const {
277   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
278 
279   if (!(MI->mayLoad() && MI->mayStore()))
280     return None;
281   if (!MI->hasOneMemOperand())
282     return MemOpInfo();
283 
284   const MachineMemOperand *MMO = *MI->memoperands_begin();
285   if (!MMO->isAtomic())
286     return None;
287   if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic)
288     return None;
289 
290   return MemOpInfo(MMO);
291 }
292 
293 Optional<SIMemoryLegalizer::MemOpInfo> SIMemoryLegalizer::getAtomicRmwInfo(
294     const MachineBasicBlock::iterator &MI) const {
295   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
296 
297   if (!(MI->mayLoad() && MI->mayStore()))
298     return None;
299   if (!MI->hasOneMemOperand())
300     return MemOpInfo();
301 
302   const MachineMemOperand *MMO = *MI->memoperands_begin();
303   if (!MMO->isAtomic())
304     return None;
305   if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
306     return None;
307 
308   return MemOpInfo(MMO);
309 }
310 
311 bool SIMemoryLegalizer::expandLoad(const MemOpInfo &MOI,
312                                    MachineBasicBlock::iterator &MI) {
313   assert(MI->mayLoad() && !MI->mayStore());
314 
315   bool Changed = false;
316   if (MOI.SSID == SyncScope::System ||
317       MOI.SSID == MMI->getAgentSSID()) {
318     if (MOI.Ordering == AtomicOrdering::Acquire ||
319         MOI.Ordering == AtomicOrdering::SequentiallyConsistent)
320       Changed |= setGLC(MI);
321 
322     if (MOI.Ordering == AtomicOrdering::SequentiallyConsistent)
323       Changed |= insertWaitcntVmcnt0(MI);
324 
325     if (MOI.Ordering == AtomicOrdering::Acquire ||
326         MOI.Ordering == AtomicOrdering::SequentiallyConsistent) {
327       Changed |= insertWaitcntVmcnt0(MI, false);
328       Changed |= insertBufferWbinvl1Vol(MI, false);
329     }
330 
331     return Changed;
332   } else if (MOI.SSID == SyncScope::SingleThread ||
333              MOI.SSID == MMI->getWorkgroupSSID() ||
334              MOI.SSID == MMI->getWavefrontSSID()) {
335     return Changed;
336   } else {
337     reportUnknownSynchScope(MI);
338     return Changed;
339   }
340 }
341 
342 bool SIMemoryLegalizer::expandStore(const MemOpInfo &MOI,
343                                     MachineBasicBlock::iterator &MI) {
344   assert(!MI->mayLoad() && MI->mayStore());
345 
346   bool Changed = false;
347   if (MOI.SSID == SyncScope::System ||
348       MOI.SSID == MMI->getAgentSSID()) {
349     if (MOI.Ordering == AtomicOrdering::Release ||
350         MOI.Ordering == AtomicOrdering::SequentiallyConsistent)
351       Changed |= insertWaitcntVmcnt0(MI);
352 
353     return Changed;
354   } else if (MOI.SSID == SyncScope::SingleThread ||
355              MOI.SSID == MMI->getWorkgroupSSID() ||
356              MOI.SSID == MMI->getWavefrontSSID()) {
357     return Changed;
358   } else {
359     reportUnknownSynchScope(MI);
360     return Changed;
361   }
362 }
363 
364 bool SIMemoryLegalizer::expandAtomicFence(const MemOpInfo &MOI,
365                                           MachineBasicBlock::iterator &MI) {
366   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
367 
368   bool Changed = false;
369   if (MOI.SSID == SyncScope::System ||
370       MOI.SSID == MMI->getAgentSSID()) {
371     if (MOI.Ordering == AtomicOrdering::Acquire ||
372         MOI.Ordering == AtomicOrdering::Release ||
373         MOI.Ordering == AtomicOrdering::AcquireRelease ||
374         MOI.Ordering == AtomicOrdering::SequentiallyConsistent)
375       Changed |= insertWaitcntVmcnt0(MI);
376 
377     if (MOI.Ordering == AtomicOrdering::Acquire ||
378         MOI.Ordering == AtomicOrdering::AcquireRelease ||
379         MOI.Ordering == AtomicOrdering::SequentiallyConsistent)
380       Changed |= insertBufferWbinvl1Vol(MI);
381 
382     AtomicPseudoMIs.push_back(MI);
383     return Changed;
384   } else if (MOI.SSID == SyncScope::SingleThread ||
385              MOI.SSID == MMI->getWorkgroupSSID() ||
386              MOI.SSID == MMI->getWavefrontSSID()) {
387     AtomicPseudoMIs.push_back(MI);
388     return Changed;
389   } else {
390     reportUnknownSynchScope(MI);
391     return Changed;
392   }
393 }
394 
395 bool SIMemoryLegalizer::expandAtomicCmpxchg(const MemOpInfo &MOI,
396                                             MachineBasicBlock::iterator &MI) {
397   assert(MI->mayLoad() && MI->mayStore());
398 
399   bool Changed = false;
400   if (MOI.SSID == SyncScope::System ||
401       MOI.SSID == MMI->getAgentSSID()) {
402     if (MOI.Ordering == AtomicOrdering::Release ||
403         MOI.Ordering == AtomicOrdering::AcquireRelease ||
404         MOI.Ordering == AtomicOrdering::SequentiallyConsistent ||
405         MOI.FailureOrdering == AtomicOrdering::SequentiallyConsistent)
406       Changed |= insertWaitcntVmcnt0(MI);
407 
408     if (MOI.Ordering == AtomicOrdering::Acquire ||
409         MOI.Ordering == AtomicOrdering::AcquireRelease ||
410         MOI.Ordering == AtomicOrdering::SequentiallyConsistent ||
411         MOI.FailureOrdering == AtomicOrdering::Acquire ||
412         MOI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) {
413       Changed |= insertWaitcntVmcnt0(MI, false);
414       Changed |= insertBufferWbinvl1Vol(MI, false);
415     }
416 
417     return Changed;
418   } else if (MOI.SSID == SyncScope::SingleThread ||
419              MOI.SSID == MMI->getWorkgroupSSID() ||
420              MOI.SSID == MMI->getWavefrontSSID()) {
421     Changed |= setGLC(MI);
422     return Changed;
423   } else {
424     reportUnknownSynchScope(MI);
425     return Changed;
426   }
427 }
428 
429 bool SIMemoryLegalizer::expandAtomicRmw(const MemOpInfo &MOI,
430                                         MachineBasicBlock::iterator &MI) {
431   assert(MI->mayLoad() && MI->mayStore());
432 
433   bool Changed = false;
434   if (MOI.SSID == SyncScope::System ||
435       MOI.SSID == MMI->getAgentSSID()) {
436     if (MOI.Ordering == AtomicOrdering::Release ||
437         MOI.Ordering == AtomicOrdering::AcquireRelease ||
438         MOI.Ordering == AtomicOrdering::SequentiallyConsistent)
439       Changed |= insertWaitcntVmcnt0(MI);
440 
441     if (MOI.Ordering == AtomicOrdering::Acquire ||
442         MOI.Ordering == AtomicOrdering::AcquireRelease ||
443         MOI.Ordering == AtomicOrdering::SequentiallyConsistent) {
444       Changed |= insertWaitcntVmcnt0(MI, false);
445       Changed |= insertBufferWbinvl1Vol(MI, false);
446     }
447 
448     return Changed;
449   } else if (MOI.SSID == SyncScope::SingleThread ||
450              MOI.SSID == MMI->getWorkgroupSSID() ||
451              MOI.SSID == MMI->getWavefrontSSID()) {
452     Changed |= setGLC(MI);
453     return Changed;
454   } else {
455     reportUnknownSynchScope(MI);
456     return Changed;
457   }
458 }
459 
460 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
461   bool Changed = false;
462   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
463   const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
464 
465   CTX = &MF.getFunction()->getContext();
466   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
467   TII = ST.getInstrInfo();
468 
469   Vmcnt0Immediate =
470       AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
471   Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
472       AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
473 
474   for (auto &MBB : MF) {
475     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
476       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
477         continue;
478 
479       if (const auto &MOI = getLoadInfo(MI))
480         Changed |= expandLoad(MOI.getValue(), MI);
481       else if (const auto &MOI = getStoreInfo(MI))
482         Changed |= expandStore(MOI.getValue(), MI);
483       else if (const auto &MOI = getAtomicFenceInfo(MI))
484         Changed |= expandAtomicFence(MOI.getValue(), MI);
485       else if (const auto &MOI = getAtomicCmpxchgInfo(MI))
486         Changed |= expandAtomicCmpxchg(MOI.getValue(), MI);
487       else if (const auto &MOI = getAtomicRmwInfo(MI))
488         Changed |= expandAtomicRmw(MOI.getValue(), MI);
489     }
490   }
491 
492   Changed |= removeAtomicPseudoMIs();
493   return Changed;
494 }
495 
496 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
497 
498 char SIMemoryLegalizer::ID = 0;
499 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
500 
501 FunctionPass *llvm::createSIMemoryLegalizerPass() {
502   return new SIMemoryLegalizer();
503 }
504