xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (revision c8fbf6ffeaeb4fc9a2b5fcd6c993d64bcbb52818)
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Memory legalizer - implements memory model. More information can be
12 /// found here:
13 ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUMachineModuleInfo.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIDefines.h"
21 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineMemOperand.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include <cassert>
40 #include <list>
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 
45 #define DEBUG_TYPE "si-memory-legalizer"
46 #define PASS_NAME "SI Memory Legalizer"
47 
48 namespace {
49 
50 class SIMemoryLegalizer final : public MachineFunctionPass {
51 private:
52   struct AtomicInfo final {
53     SyncScope::ID SSID = SyncScope::System;
54     AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent;
55     AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent;
56 
57     AtomicInfo() = default;
58 
59     AtomicInfo(SyncScope::ID SSID,
60                AtomicOrdering Ordering,
61                AtomicOrdering FailureOrdering)
62         : SSID(SSID),
63           Ordering(Ordering),
64           FailureOrdering(FailureOrdering) {}
65 
66     AtomicInfo(const MachineMemOperand *MMO)
67         : SSID(MMO->getSyncScopeID()),
68           Ordering(MMO->getOrdering()),
69           FailureOrdering(MMO->getFailureOrdering()) {}
70   };
71 
72   /// \brief LLVM context.
73   LLVMContext *CTX = nullptr;
74 
75   /// \brief Machine module info.
76   const AMDGPUMachineModuleInfo *MMI = nullptr;
77 
78   /// \brief Instruction info.
79   const SIInstrInfo *TII = nullptr;
80 
81   /// \brief Immediate for "vmcnt(0)".
82   unsigned Vmcnt0Immediate = 0;
83 
84   /// \brief Opcode for cache invalidation instruction (L1).
85   unsigned Wbinvl1Opcode = 0;
86 
87   /// \brief List of atomic pseudo instructions.
88   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
89 
90   /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
91   /// Always returns true.
92   bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
93                               bool Before = true) const;
94   /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
95   /// Always returns true.
96   bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
97                            bool Before = true) const;
98 
99   /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is
100   /// modified, false otherwise.
101   bool setGLC(const MachineBasicBlock::iterator &MI) const;
102 
103   /// \brief Removes all processed atomic pseudo instructions from the current
104   /// function. Returns true if current function is modified, false otherwise.
105   bool removeAtomicPseudoMIs();
106 
107   /// \brief Reports unknown synchronization scope used in \p MI to LLVM
108   /// context.
109   void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI);
110 
111   /// \returns Atomic fence info if \p MI is an atomic fence operation,
112   /// "None" otherwise.
113   Optional<AtomicInfo> getAtomicFenceInfo(
114       const MachineBasicBlock::iterator &MI) const;
115   /// \returns Atomic load info if \p MI is an atomic load operation,
116   /// "None" otherwise.
117   Optional<AtomicInfo> getAtomicLoadInfo(
118       const MachineBasicBlock::iterator &MI) const;
119   /// \returns Atomic store info if \p MI is an atomic store operation,
120   /// "None" otherwise.
121   Optional<AtomicInfo> getAtomicStoreInfo(
122       const MachineBasicBlock::iterator &MI) const;
123   /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation,
124   /// "None" otherwise.
125   Optional<AtomicInfo> getAtomicCmpxchgInfo(
126       const MachineBasicBlock::iterator &MI) const;
127   /// \returns Atomic rmw info if \p MI is an atomic rmw operation,
128   /// "None" otherwise.
129   Optional<AtomicInfo> getAtomicRmwInfo(
130       const MachineBasicBlock::iterator &MI) const;
131 
132   /// \brief Expands atomic fence operation \p MI. Returns true if
133   /// instructions are added/deleted or \p MI is modified, false otherwise.
134   bool expandAtomicFence(const AtomicInfo &AI,
135                          MachineBasicBlock::iterator &MI);
136   /// \brief Expands atomic load operation \p MI. Returns true if
137   /// instructions are added/deleted or \p MI is modified, false otherwise.
138   bool expandAtomicLoad(const AtomicInfo &AI,
139                         MachineBasicBlock::iterator &MI);
140   /// \brief Expands atomic store operation \p MI. Returns true if
141   /// instructions are added/deleted or \p MI is modified, false otherwise.
142   bool expandAtomicStore(const AtomicInfo &AI,
143                          MachineBasicBlock::iterator &MI);
144   /// \brief Expands atomic cmpxchg operation \p MI. Returns true if
145   /// instructions are added/deleted or \p MI is modified, false otherwise.
146   bool expandAtomicCmpxchg(const AtomicInfo &AI,
147                            MachineBasicBlock::iterator &MI);
148   /// \brief Expands atomic rmw operation \p MI. Returns true if
149   /// instructions are added/deleted or \p MI is modified, false otherwise.
150   bool expandAtomicRmw(const AtomicInfo &AI,
151                        MachineBasicBlock::iterator &MI);
152 
153 public:
154   static char ID;
155 
156   SIMemoryLegalizer() : MachineFunctionPass(ID) {}
157 
158   void getAnalysisUsage(AnalysisUsage &AU) const override {
159     AU.setPreservesCFG();
160     MachineFunctionPass::getAnalysisUsage(AU);
161   }
162 
163   StringRef getPassName() const override {
164     return PASS_NAME;
165   }
166 
167   bool runOnMachineFunction(MachineFunction &MF) override;
168 };
169 
170 } // end namespace anonymous
171 
172 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
173                                                bool Before) const {
174   MachineBasicBlock &MBB = *MI->getParent();
175   DebugLoc DL = MI->getDebugLoc();
176 
177   if (!Before)
178     ++MI;
179 
180   BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode));
181 
182   if (!Before)
183     --MI;
184 
185   return true;
186 }
187 
188 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
189                                             bool Before) const {
190   MachineBasicBlock &MBB = *MI->getParent();
191   DebugLoc DL = MI->getDebugLoc();
192 
193   if (!Before)
194     ++MI;
195 
196   BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
197 
198   if (!Before)
199     --MI;
200 
201   return true;
202 }
203 
204 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const {
205   int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc);
206   if (GLCIdx == -1)
207     return false;
208 
209   MachineOperand &GLC = MI->getOperand(GLCIdx);
210   if (GLC.getImm() == 1)
211     return false;
212 
213   GLC.setImm(1);
214   return true;
215 }
216 
217 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
218   if (AtomicPseudoMIs.empty())
219     return false;
220 
221   for (auto &MI : AtomicPseudoMIs)
222     MI->eraseFromParent();
223 
224   AtomicPseudoMIs.clear();
225   return true;
226 }
227 
228 void SIMemoryLegalizer::reportUnknownSynchScope(
229     const MachineBasicBlock::iterator &MI) {
230   DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(),
231                                  "Unsupported synchronization scope");
232   CTX->diagnose(Diag);
233 }
234 
235 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicFenceInfo(
236     const MachineBasicBlock::iterator &MI) const {
237   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
238 
239   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
240     return None;
241 
242   SyncScope::ID SSID =
243       static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
244   AtomicOrdering Ordering =
245       static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
246   return AtomicInfo(SSID, Ordering, AtomicOrdering::NotAtomic);
247 }
248 
249 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicLoadInfo(
250     const MachineBasicBlock::iterator &MI) const {
251   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
252 
253   if (!(MI->mayLoad() && !MI->mayStore()))
254     return None;
255   if (!MI->hasOneMemOperand())
256     return AtomicInfo();
257 
258   const MachineMemOperand *MMO = *MI->memoperands_begin();
259   if (!MMO->isAtomic())
260     return None;
261 
262   return AtomicInfo(MMO);
263 }
264 
265 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicStoreInfo(
266     const MachineBasicBlock::iterator &MI) const {
267   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
268 
269   if (!(!MI->mayLoad() && MI->mayStore()))
270     return None;
271   if (!MI->hasOneMemOperand())
272     return AtomicInfo();
273 
274   const MachineMemOperand *MMO = *MI->memoperands_begin();
275   if (!MMO->isAtomic())
276     return None;
277 
278   return AtomicInfo(MMO);
279 }
280 
281 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicCmpxchgInfo(
282     const MachineBasicBlock::iterator &MI) const {
283   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
284 
285   if (!(MI->mayLoad() && MI->mayStore()))
286     return None;
287   if (!MI->hasOneMemOperand())
288     return AtomicInfo();
289 
290   const MachineMemOperand *MMO = *MI->memoperands_begin();
291   if (!MMO->isAtomic())
292     return None;
293   if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic)
294     return None;
295 
296   return AtomicInfo(MMO);
297 }
298 
299 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicRmwInfo(
300     const MachineBasicBlock::iterator &MI) const {
301   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
302 
303   if (!(MI->mayLoad() && MI->mayStore()))
304     return None;
305   if (!MI->hasOneMemOperand())
306     return AtomicInfo();
307 
308   const MachineMemOperand *MMO = *MI->memoperands_begin();
309   if (!MMO->isAtomic())
310     return None;
311   if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
312     return None;
313 
314   return AtomicInfo(MMO);
315 }
316 
317 bool SIMemoryLegalizer::expandAtomicFence(const AtomicInfo &AI,
318                                           MachineBasicBlock::iterator &MI) {
319   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
320 
321   bool Changed = false;
322   if (AI.SSID == SyncScope::System ||
323       AI.SSID == MMI->getAgentSSID()) {
324     if (AI.Ordering == AtomicOrdering::Acquire ||
325         AI.Ordering == AtomicOrdering::Release ||
326         AI.Ordering == AtomicOrdering::AcquireRelease ||
327         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
328       Changed |= insertWaitcntVmcnt0(MI);
329 
330     if (AI.Ordering == AtomicOrdering::Acquire ||
331         AI.Ordering == AtomicOrdering::AcquireRelease ||
332         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
333       Changed |= insertBufferWbinvl1Vol(MI);
334 
335     AtomicPseudoMIs.push_back(MI);
336     return Changed;
337   } else if (AI.SSID == SyncScope::SingleThread ||
338              AI.SSID == MMI->getWorkgroupSSID() ||
339              AI.SSID == MMI->getWavefrontSSID()) {
340     AtomicPseudoMIs.push_back(MI);
341     return Changed;
342   } else {
343     reportUnknownSynchScope(MI);
344     return Changed;
345   }
346 }
347 
348 bool SIMemoryLegalizer::expandAtomicLoad(const AtomicInfo &AI,
349                                          MachineBasicBlock::iterator &MI) {
350   assert(MI->mayLoad() && !MI->mayStore());
351 
352   bool Changed = false;
353   if (AI.SSID == SyncScope::System ||
354       AI.SSID == MMI->getAgentSSID()) {
355     if (AI.Ordering == AtomicOrdering::Acquire ||
356         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
357       Changed |= setGLC(MI);
358 
359     if (AI.Ordering == AtomicOrdering::SequentiallyConsistent)
360       Changed |= insertWaitcntVmcnt0(MI);
361 
362     if (AI.Ordering == AtomicOrdering::Acquire ||
363         AI.Ordering == AtomicOrdering::SequentiallyConsistent) {
364       Changed |= insertWaitcntVmcnt0(MI, false);
365       Changed |= insertBufferWbinvl1Vol(MI, false);
366     }
367 
368     return Changed;
369   } else if (AI.SSID == SyncScope::SingleThread ||
370              AI.SSID == MMI->getWorkgroupSSID() ||
371              AI.SSID == MMI->getWavefrontSSID()) {
372     return Changed;
373   } else {
374     reportUnknownSynchScope(MI);
375     return Changed;
376   }
377 }
378 
379 bool SIMemoryLegalizer::expandAtomicStore(const AtomicInfo &AI,
380                                           MachineBasicBlock::iterator &MI) {
381   assert(!MI->mayLoad() && MI->mayStore());
382 
383   bool Changed = false;
384   if (AI.SSID == SyncScope::System ||
385       AI.SSID == MMI->getAgentSSID()) {
386     if (AI.Ordering == AtomicOrdering::Release ||
387         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
388       Changed |= insertWaitcntVmcnt0(MI);
389 
390     return Changed;
391   } else if (AI.SSID == SyncScope::SingleThread ||
392              AI.SSID == MMI->getWorkgroupSSID() ||
393              AI.SSID == MMI->getWavefrontSSID()) {
394     return Changed;
395   } else {
396     reportUnknownSynchScope(MI);
397     return Changed;
398   }
399 }
400 
401 bool SIMemoryLegalizer::expandAtomicCmpxchg(const AtomicInfo &AI,
402                                             MachineBasicBlock::iterator &MI) {
403   assert(MI->mayLoad() && MI->mayStore());
404 
405   bool Changed = false;
406   if (AI.SSID == SyncScope::System ||
407       AI.SSID == MMI->getAgentSSID()) {
408     if (AI.Ordering == AtomicOrdering::Release ||
409         AI.Ordering == AtomicOrdering::AcquireRelease ||
410         AI.Ordering == AtomicOrdering::SequentiallyConsistent ||
411         AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent)
412       Changed |= insertWaitcntVmcnt0(MI);
413 
414     if (AI.Ordering == AtomicOrdering::Acquire ||
415         AI.Ordering == AtomicOrdering::AcquireRelease ||
416         AI.Ordering == AtomicOrdering::SequentiallyConsistent ||
417         AI.FailureOrdering == AtomicOrdering::Acquire ||
418         AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) {
419       Changed |= insertWaitcntVmcnt0(MI, false);
420       Changed |= insertBufferWbinvl1Vol(MI, false);
421     }
422 
423     return Changed;
424   } else if (AI.SSID == SyncScope::SingleThread ||
425              AI.SSID == MMI->getWorkgroupSSID() ||
426              AI.SSID == MMI->getWavefrontSSID()) {
427     Changed |= setGLC(MI);
428     return Changed;
429   } else {
430     reportUnknownSynchScope(MI);
431     return Changed;
432   }
433 }
434 
435 bool SIMemoryLegalizer::expandAtomicRmw(const AtomicInfo &AI,
436                                         MachineBasicBlock::iterator &MI) {
437   assert(MI->mayLoad() && MI->mayStore());
438 
439   bool Changed = false;
440   if (AI.SSID == SyncScope::System ||
441       AI.SSID == MMI->getAgentSSID()) {
442     if (AI.Ordering == AtomicOrdering::Release ||
443         AI.Ordering == AtomicOrdering::AcquireRelease ||
444         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
445       Changed |= insertWaitcntVmcnt0(MI);
446 
447     if (AI.Ordering == AtomicOrdering::Acquire ||
448         AI.Ordering == AtomicOrdering::AcquireRelease ||
449         AI.Ordering == AtomicOrdering::SequentiallyConsistent) {
450       Changed |= insertWaitcntVmcnt0(MI, false);
451       Changed |= insertBufferWbinvl1Vol(MI, false);
452     }
453 
454     return Changed;
455   } else if (AI.SSID == SyncScope::SingleThread ||
456              AI.SSID == MMI->getWorkgroupSSID() ||
457              AI.SSID == MMI->getWavefrontSSID()) {
458     Changed |= setGLC(MI);
459     return Changed;
460   } else {
461     reportUnknownSynchScope(MI);
462     return Changed;
463   }
464 }
465 
466 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
467   bool Changed = false;
468   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
469   const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
470 
471   CTX = &MF.getFunction()->getContext();
472   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
473   TII = ST.getInstrInfo();
474 
475   Vmcnt0Immediate =
476       AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
477   Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
478       AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
479 
480   for (auto &MBB : MF) {
481     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
482       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
483         continue;
484 
485       if (const auto &AI = getAtomicFenceInfo(MI))
486         Changed |= expandAtomicFence(AI.getValue(), MI);
487       else if (const auto &AI = getAtomicLoadInfo(MI))
488         Changed |= expandAtomicLoad(AI.getValue(), MI);
489       else if (const auto &AI = getAtomicStoreInfo(MI))
490         Changed |= expandAtomicStore(AI.getValue(), MI);
491       else if (const auto &AI = getAtomicCmpxchgInfo(MI))
492         Changed |= expandAtomicCmpxchg(AI.getValue(), MI);
493       else if (const auto &AI = getAtomicRmwInfo(MI))
494         Changed |= expandAtomicRmw(AI.getValue(), MI);
495     }
496   }
497 
498   Changed |= removeAtomicPseudoMIs();
499   return Changed;
500 }
501 
502 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
503 
504 char SIMemoryLegalizer::ID = 0;
505 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
506 
507 FunctionPass *llvm::createSIMemoryLegalizerPass() {
508   return new SIMemoryLegalizer();
509 }
510