xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (revision e9a5a77ee3e861ec2b1a5fefc55d86fbb1252fcb)
1 //===--- SIMemoryLegalizer.cpp ----------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Memory legalizer - implements memory model. More information can be
12 /// found here:
13 ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
14 ///
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUMachineModuleInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/IR/DiagnosticInfo.h"
25 
26 using namespace llvm;
27 using namespace llvm::AMDGPU;
28 
29 #define DEBUG_TYPE "si-memory-legalizer"
30 #define PASS_NAME "SI Memory Legalizer"
31 
32 namespace {
33 
34 class SIMemoryLegalizer final : public MachineFunctionPass {
35 private:
36   struct AtomicInfo final {
37     SyncScope::ID SSID = SyncScope::System;
38     AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent;
39     AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent;
40 
41     AtomicInfo() {}
42 
43     AtomicInfo(SyncScope::ID SSID,
44                AtomicOrdering Ordering,
45                AtomicOrdering FailureOrdering)
46         : SSID(SSID),
47           Ordering(Ordering),
48           FailureOrdering(FailureOrdering) {}
49 
50     AtomicInfo(const MachineMemOperand *MMO)
51         : SSID(MMO->getSyncScopeID()),
52           Ordering(MMO->getOrdering()),
53           FailureOrdering(MMO->getFailureOrdering()) {}
54   };
55 
56   /// \brief LLVM context.
57   LLVMContext *CTX = nullptr;
58   /// \brief Machine module info.
59   const AMDGPUMachineModuleInfo *MMI = nullptr;
60   /// \brief Instruction info.
61   const SIInstrInfo *TII = nullptr;
62 
63   /// \brief Immediate for "vmcnt(0)".
64   unsigned Vmcnt0Immediate = 0;
65   /// \brief Opcode for cache invalidation instruction (L1).
66   unsigned Wbinvl1Opcode = 0;
67 
68   /// \brief List of atomic pseudo instructions.
69   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
70 
71   /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
72   /// Always returns true.
73   bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
74                               bool Before = true) const;
75   /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
76   /// Always returns true.
77   bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
78                            bool Before = true) const;
79 
80   /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is
81   /// modified, false otherwise.
82   bool setGLC(const MachineBasicBlock::iterator &MI) const;
83 
84   /// \brief Removes all processed atomic pseudo instructions from the current
85   /// function. Returns true if current function is modified, false otherwise.
86   bool removeAtomicPseudoMIs();
87 
88   /// \brief Reports unknown synchronization scope used in \p MI to LLVM
89   /// context.
90   void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI);
91 
92   /// \returns Atomic fence info if \p MI is an atomic fence operation,
93   /// "None" otherwise.
94   Optional<AtomicInfo> getAtomicFenceInfo(
95       const MachineBasicBlock::iterator &MI) const;
96   /// \returns Atomic load info if \p MI is an atomic load operation,
97   /// "None" otherwise.
98   Optional<AtomicInfo> getAtomicLoadInfo(
99       const MachineBasicBlock::iterator &MI) const;
100   /// \returns Atomic store info if \p MI is an atomic store operation,
101   /// "None" otherwise.
102   Optional<AtomicInfo> getAtomicStoreInfo(
103       const MachineBasicBlock::iterator &MI) const;
104   /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation,
105   /// "None" otherwise.
106   Optional<AtomicInfo> getAtomicCmpxchgInfo(
107       const MachineBasicBlock::iterator &MI) const;
108   /// \returns Atomic rmw info if \p MI is an atomic rmw operation,
109   /// "None" otherwise.
110   Optional<AtomicInfo> getAtomicRmwInfo(
111       const MachineBasicBlock::iterator &MI) const;
112 
113   /// \brief Expands atomic fence operation \p MI. Returns true if
114   /// instructions are added/deleted or \p MI is modified, false otherwise.
115   bool expandAtomicFence(const AtomicInfo &AI,
116                          MachineBasicBlock::iterator &MI);
117   /// \brief Expands atomic load operation \p MI. Returns true if
118   /// instructions are added/deleted or \p MI is modified, false otherwise.
119   bool expandAtomicLoad(const AtomicInfo &AI,
120                         MachineBasicBlock::iterator &MI);
121   /// \brief Expands atomic store operation \p MI. Returns true if
122   /// instructions are added/deleted or \p MI is modified, false otherwise.
123   bool expandAtomicStore(const AtomicInfo &AI,
124                          MachineBasicBlock::iterator &MI);
125   /// \brief Expands atomic cmpxchg operation \p MI. Returns true if
126   /// instructions are added/deleted or \p MI is modified, false otherwise.
127   bool expandAtomicCmpxchg(const AtomicInfo &AI,
128                            MachineBasicBlock::iterator &MI);
129   /// \brief Expands atomic rmw operation \p MI. Returns true if
130   /// instructions are added/deleted or \p MI is modified, false otherwise.
131   bool expandAtomicRmw(const AtomicInfo &AI,
132                        MachineBasicBlock::iterator &MI);
133 
134 public:
135   static char ID;
136 
137   SIMemoryLegalizer()
138       : MachineFunctionPass(ID) {}
139 
140   void getAnalysisUsage(AnalysisUsage &AU) const override {
141     AU.setPreservesCFG();
142     MachineFunctionPass::getAnalysisUsage(AU);
143   }
144 
145   StringRef getPassName() const override {
146     return PASS_NAME;
147   }
148 
149   bool runOnMachineFunction(MachineFunction &MF) override;
150 };
151 
152 } // end namespace anonymous
153 
154 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
155                                                bool Before) const {
156   MachineBasicBlock &MBB = *MI->getParent();
157   DebugLoc DL = MI->getDebugLoc();
158 
159   if (!Before)
160     ++MI;
161 
162   BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode));
163 
164   if (!Before)
165     --MI;
166 
167   return true;
168 }
169 
170 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
171                                             bool Before) const {
172   MachineBasicBlock &MBB = *MI->getParent();
173   DebugLoc DL = MI->getDebugLoc();
174 
175   if (!Before)
176     ++MI;
177 
178   BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
179 
180   if (!Before)
181     --MI;
182 
183   return true;
184 }
185 
186 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const {
187   int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc);
188   if (GLCIdx == -1)
189     return false;
190 
191   MachineOperand &GLC = MI->getOperand(GLCIdx);
192   if (GLC.getImm() == 1)
193     return false;
194 
195   GLC.setImm(1);
196   return true;
197 }
198 
199 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
200   if (AtomicPseudoMIs.empty())
201     return false;
202 
203   for (auto &MI : AtomicPseudoMIs)
204     MI->eraseFromParent();
205 
206   AtomicPseudoMIs.clear();
207   return true;
208 }
209 
210 void SIMemoryLegalizer::reportUnknownSynchScope(
211     const MachineBasicBlock::iterator &MI) {
212   DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(),
213                                  "Unsupported synchronization scope");
214   CTX->diagnose(Diag);
215 }
216 
217 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicFenceInfo(
218     const MachineBasicBlock::iterator &MI) const {
219   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
220 
221   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
222     return None;
223 
224   SyncScope::ID SSID =
225       static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
226   AtomicOrdering Ordering =
227       static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
228   return AtomicInfo(SSID, Ordering, AtomicOrdering::NotAtomic);
229 }
230 
231 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicLoadInfo(
232     const MachineBasicBlock::iterator &MI) const {
233   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
234 
235   if (!(MI->mayLoad() && !MI->mayStore()))
236     return None;
237   if (!MI->hasOneMemOperand())
238     return AtomicInfo();
239 
240   const MachineMemOperand *MMO = *MI->memoperands_begin();
241   if (!MMO->isAtomic())
242     return None;
243 
244   return AtomicInfo(MMO);
245 }
246 
247 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicStoreInfo(
248     const MachineBasicBlock::iterator &MI) const {
249   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
250 
251   if (!(!MI->mayLoad() && MI->mayStore()))
252     return None;
253   if (!MI->hasOneMemOperand())
254     return AtomicInfo();
255 
256   const MachineMemOperand *MMO = *MI->memoperands_begin();
257   if (!MMO->isAtomic())
258     return None;
259 
260   return AtomicInfo(MMO);
261 }
262 
263 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicCmpxchgInfo(
264     const MachineBasicBlock::iterator &MI) const {
265   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
266 
267   if (!(MI->mayLoad() && MI->mayStore()))
268     return None;
269   if (!MI->hasOneMemOperand())
270     return AtomicInfo();
271 
272   const MachineMemOperand *MMO = *MI->memoperands_begin();
273   if (!MMO->isAtomic())
274     return None;
275   if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic)
276     return None;
277 
278   return AtomicInfo(MMO);
279 }
280 
281 Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicRmwInfo(
282     const MachineBasicBlock::iterator &MI) const {
283   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
284 
285   if (!(MI->mayLoad() && MI->mayStore()))
286     return None;
287   if (!MI->hasOneMemOperand())
288     return AtomicInfo();
289 
290   const MachineMemOperand *MMO = *MI->memoperands_begin();
291   if (!MMO->isAtomic())
292     return None;
293   if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
294     return None;
295 
296   return AtomicInfo(MMO);
297 }
298 
299 bool SIMemoryLegalizer::expandAtomicFence(const AtomicInfo &AI,
300                                           MachineBasicBlock::iterator &MI) {
301   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
302 
303   bool Changed = false;
304   if (AI.SSID == SyncScope::System ||
305       AI.SSID == MMI->getAgentSSID()) {
306     if (AI.Ordering == AtomicOrdering::Acquire ||
307         AI.Ordering == AtomicOrdering::Release ||
308         AI.Ordering == AtomicOrdering::AcquireRelease ||
309         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
310       Changed |= insertWaitcntVmcnt0(MI);
311 
312     if (AI.Ordering == AtomicOrdering::Acquire ||
313         AI.Ordering == AtomicOrdering::AcquireRelease ||
314         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
315       Changed |= insertBufferWbinvl1Vol(MI);
316 
317     AtomicPseudoMIs.push_back(MI);
318     return Changed;
319   } else if (AI.SSID == SyncScope::SingleThread ||
320              AI.SSID == MMI->getWorkgroupSSID() ||
321              AI.SSID == MMI->getWavefrontSSID()) {
322     AtomicPseudoMIs.push_back(MI);
323     return Changed;
324   } else {
325     reportUnknownSynchScope(MI);
326     return Changed;
327   }
328 }
329 
330 bool SIMemoryLegalizer::expandAtomicLoad(const AtomicInfo &AI,
331                                          MachineBasicBlock::iterator &MI) {
332   assert(MI->mayLoad() && !MI->mayStore());
333 
334   bool Changed = false;
335   if (AI.SSID == SyncScope::System ||
336       AI.SSID == MMI->getAgentSSID()) {
337     if (AI.Ordering == AtomicOrdering::Acquire ||
338         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
339       Changed |= setGLC(MI);
340 
341     if (AI.Ordering == AtomicOrdering::SequentiallyConsistent)
342       Changed |= insertWaitcntVmcnt0(MI);
343 
344     if (AI.Ordering == AtomicOrdering::Acquire ||
345         AI.Ordering == AtomicOrdering::SequentiallyConsistent) {
346       Changed |= insertWaitcntVmcnt0(MI, false);
347       Changed |= insertBufferWbinvl1Vol(MI, false);
348     }
349 
350     return Changed;
351   } else if (AI.SSID == SyncScope::SingleThread ||
352              AI.SSID == MMI->getWorkgroupSSID() ||
353              AI.SSID == MMI->getWavefrontSSID()) {
354     return Changed;
355   } else {
356     reportUnknownSynchScope(MI);
357     return Changed;
358   }
359 }
360 
361 bool SIMemoryLegalizer::expandAtomicStore(const AtomicInfo &AI,
362                                           MachineBasicBlock::iterator &MI) {
363   assert(!MI->mayLoad() && MI->mayStore());
364 
365   bool Changed = false;
366   if (AI.SSID == SyncScope::System ||
367       AI.SSID == MMI->getAgentSSID()) {
368     if (AI.Ordering == AtomicOrdering::Release ||
369         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
370       Changed |= insertWaitcntVmcnt0(MI);
371 
372     return Changed;
373   } else if (AI.SSID == SyncScope::SingleThread ||
374              AI.SSID == MMI->getWorkgroupSSID() ||
375              AI.SSID == MMI->getWavefrontSSID()) {
376     return Changed;
377   } else {
378     reportUnknownSynchScope(MI);
379     return Changed;
380   }
381 }
382 
383 bool SIMemoryLegalizer::expandAtomicCmpxchg(const AtomicInfo &AI,
384                                             MachineBasicBlock::iterator &MI) {
385   assert(MI->mayLoad() && MI->mayStore());
386 
387   bool Changed = false;
388   if (AI.SSID == SyncScope::System ||
389       AI.SSID == MMI->getAgentSSID()) {
390     if (AI.Ordering == AtomicOrdering::Release ||
391         AI.Ordering == AtomicOrdering::AcquireRelease ||
392         AI.Ordering == AtomicOrdering::SequentiallyConsistent ||
393         AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent)
394       Changed |= insertWaitcntVmcnt0(MI);
395 
396     if (AI.Ordering == AtomicOrdering::Acquire ||
397         AI.Ordering == AtomicOrdering::AcquireRelease ||
398         AI.Ordering == AtomicOrdering::SequentiallyConsistent ||
399         AI.FailureOrdering == AtomicOrdering::Acquire ||
400         AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) {
401       Changed |= insertWaitcntVmcnt0(MI, false);
402       Changed |= insertBufferWbinvl1Vol(MI, false);
403     }
404 
405     return Changed;
406   } else if (AI.SSID == SyncScope::SingleThread ||
407              AI.SSID == MMI->getWorkgroupSSID() ||
408              AI.SSID == MMI->getWavefrontSSID()) {
409     Changed |= setGLC(MI);
410     return Changed;
411   } else {
412     reportUnknownSynchScope(MI);
413     return Changed;
414   }
415 }
416 
417 bool SIMemoryLegalizer::expandAtomicRmw(const AtomicInfo &AI,
418                                         MachineBasicBlock::iterator &MI) {
419   assert(MI->mayLoad() && MI->mayStore());
420 
421   bool Changed = false;
422   if (AI.SSID == SyncScope::System ||
423       AI.SSID == MMI->getAgentSSID()) {
424     if (AI.Ordering == AtomicOrdering::Release ||
425         AI.Ordering == AtomicOrdering::AcquireRelease ||
426         AI.Ordering == AtomicOrdering::SequentiallyConsistent)
427       Changed |= insertWaitcntVmcnt0(MI);
428 
429     if (AI.Ordering == AtomicOrdering::Acquire ||
430         AI.Ordering == AtomicOrdering::AcquireRelease ||
431         AI.Ordering == AtomicOrdering::SequentiallyConsistent) {
432       Changed |= insertWaitcntVmcnt0(MI, false);
433       Changed |= insertBufferWbinvl1Vol(MI, false);
434     }
435 
436     return Changed;
437   } else if (AI.SSID == SyncScope::SingleThread ||
438              AI.SSID == MMI->getWorkgroupSSID() ||
439              AI.SSID == MMI->getWavefrontSSID()) {
440     Changed |= setGLC(MI);
441     return Changed;
442   } else {
443     reportUnknownSynchScope(MI);
444     return Changed;
445   }
446 }
447 
448 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
449   bool Changed = false;
450   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
451   const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
452 
453   CTX = &MF.getFunction()->getContext();
454   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
455   TII = ST.getInstrInfo();
456 
457   Vmcnt0Immediate =
458       AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
459   Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
460       AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
461 
462   for (auto &MBB : MF) {
463     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
464       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
465         continue;
466 
467       if (const auto &AI = getAtomicFenceInfo(MI))
468         Changed |= expandAtomicFence(AI.getValue(), MI);
469       else if (const auto &AI = getAtomicLoadInfo(MI))
470         Changed |= expandAtomicLoad(AI.getValue(), MI);
471       else if (const auto &AI = getAtomicStoreInfo(MI))
472         Changed |= expandAtomicStore(AI.getValue(), MI);
473       else if (const auto &AI = getAtomicCmpxchgInfo(MI))
474         Changed |= expandAtomicCmpxchg(AI.getValue(), MI);
475       else if (const auto &AI = getAtomicRmwInfo(MI))
476         Changed |= expandAtomicRmw(AI.getValue(), MI);
477     }
478   }
479 
480   Changed |= removeAtomicPseudoMIs();
481   return Changed;
482 }
483 
484 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
485 
486 char SIMemoryLegalizer::ID = 0;
487 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
488 
489 FunctionPass *llvm::createSIMemoryLegalizerPass() {
490   return new SIMemoryLegalizer();
491 }
492