xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (revision 1aa667fe6424e2a6abd6a7d0cd14cbc839ef0b27)
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Memory legalizer - implements memory model. More information can be
12 /// found here:
13 ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUMachineModuleInfo.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIDefines.h"
21 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineMemOperand.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include <cassert>
40 #include <list>
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 
45 #define DEBUG_TYPE "si-memory-legalizer"
46 #define PASS_NAME "SI Memory Legalizer"
47 
48 namespace {
49 
50 class SIMemOpInfo final {
51 private:
52   SyncScope::ID SSID = SyncScope::System;
53   AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent;
54   AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent;
55 
56   SIMemOpInfo() = default;
57 
58   SIMemOpInfo(SyncScope::ID SSID,
59               AtomicOrdering Ordering,
60               AtomicOrdering FailureOrdering)
61       : SSID(SSID),
62         Ordering(Ordering),
63         FailureOrdering(FailureOrdering) {}
64 
65   SIMemOpInfo(const MachineMemOperand *MMO)
66       : SSID(MMO->getSyncScopeID()),
67         Ordering(MMO->getOrdering()),
68         FailureOrdering(MMO->getFailureOrdering()) {}
69 
70 public:
71   /// \returns Synchronization scope ID of the machine instruction used to
72   /// create this SIMemOpInfo.
73   SyncScope::ID getSSID() const {
74     return SSID;
75   }
76   /// \returns Ordering constraint of the machine instruction used to
77   /// create this SIMemOpInfo.
78   AtomicOrdering getOrdering() const {
79     return Ordering;
80   }
81   /// \returns Failure ordering constraint of the machine instruction used to
82   /// create this SIMemOpInfo.
83   AtomicOrdering getFailureOrdering() const {
84     return FailureOrdering;
85   }
86 
87   /// \returns Load info if \p MI is a load operation, "None" otherwise.
88   static Optional<SIMemOpInfo> getLoadInfo(
89       const MachineBasicBlock::iterator &MI);
90   /// \returns Store info if \p MI is a store operation, "None" otherwise.
91   static Optional<SIMemOpInfo> getStoreInfo(
92       const MachineBasicBlock::iterator &MI);
93   /// \returns Atomic fence info if \p MI is an atomic fence operation,
94   /// "None" otherwise.
95   static Optional<SIMemOpInfo> getAtomicFenceInfo(
96       const MachineBasicBlock::iterator &MI);
97   /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation,
98   /// "None" otherwise.
99   static Optional<SIMemOpInfo> getAtomicCmpxchgInfo(
100       const MachineBasicBlock::iterator &MI);
101   /// \returns Atomic rmw info if \p MI is an atomic rmw operation,
102   /// "None" otherwise.
103   static Optional<SIMemOpInfo> getAtomicRmwInfo(
104       const MachineBasicBlock::iterator &MI);
105 };
106 
107 class SIMemoryLegalizer final : public MachineFunctionPass {
108 private:
109   /// \brief LLVM context.
110   LLVMContext *CTX = nullptr;
111 
112   /// \brief Machine module info.
113   const AMDGPUMachineModuleInfo *MMI = nullptr;
114 
115   /// \brief Instruction info.
116   const SIInstrInfo *TII = nullptr;
117 
118   /// \brief Immediate for "vmcnt(0)".
119   unsigned Vmcnt0Immediate = 0;
120 
121   /// \brief Opcode for cache invalidation instruction (L1).
122   unsigned Wbinvl1Opcode = 0;
123 
124   /// \brief List of atomic pseudo instructions.
125   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
126 
127   /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
128   /// Always returns true.
129   bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
130                               bool Before = true) const;
131   /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
132   /// Always returns true.
133   bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
134                            bool Before = true) const;
135 
136   /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is
137   /// modified, false otherwise.
138   bool setGLC(const MachineBasicBlock::iterator &MI) const;
139 
140   /// \brief Removes all processed atomic pseudo instructions from the current
141   /// function. Returns true if current function is modified, false otherwise.
142   bool removeAtomicPseudoMIs();
143 
144   /// \brief Reports unknown synchronization scope used in \p MI to LLVM
145   /// context.
146   void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI);
147 
148   /// \brief Expands load operation \p MI. Returns true if instructions are
149   /// added/deleted or \p MI is modified, false otherwise.
150   bool expandLoad(const SIMemOpInfo &MOI,
151                   MachineBasicBlock::iterator &MI);
152   /// \brief Expands store operation \p MI. Returns true if instructions are
153   /// added/deleted or \p MI is modified, false otherwise.
154   bool expandStore(const SIMemOpInfo &MOI,
155                    MachineBasicBlock::iterator &MI);
156   /// \brief Expands atomic fence operation \p MI. Returns true if
157   /// instructions are added/deleted or \p MI is modified, false otherwise.
158   bool expandAtomicFence(const SIMemOpInfo &MOI,
159                          MachineBasicBlock::iterator &MI);
160   /// \brief Expands atomic cmpxchg operation \p MI. Returns true if
161   /// instructions are added/deleted or \p MI is modified, false otherwise.
162   bool expandAtomicCmpxchg(const SIMemOpInfo &MOI,
163                            MachineBasicBlock::iterator &MI);
164   /// \brief Expands atomic rmw operation \p MI. Returns true if
165   /// instructions are added/deleted or \p MI is modified, false otherwise.
166   bool expandAtomicRmw(const SIMemOpInfo &MOI,
167                        MachineBasicBlock::iterator &MI);
168 
169 public:
170   static char ID;
171 
172   SIMemoryLegalizer() : MachineFunctionPass(ID) {}
173 
174   void getAnalysisUsage(AnalysisUsage &AU) const override {
175     AU.setPreservesCFG();
176     MachineFunctionPass::getAnalysisUsage(AU);
177   }
178 
179   StringRef getPassName() const override {
180     return PASS_NAME;
181   }
182 
183   bool runOnMachineFunction(MachineFunction &MF) override;
184 };
185 
186 } // end namespace anonymous
187 
188 /* static */
189 Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo(
190     const MachineBasicBlock::iterator &MI) {
191   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
192 
193   if (!(MI->mayLoad() && !MI->mayStore()))
194     return None;
195   if (!MI->hasOneMemOperand())
196     return SIMemOpInfo();
197 
198   const MachineMemOperand *MMO = *MI->memoperands_begin();
199   if (!MMO->isAtomic())
200     return None;
201 
202   return SIMemOpInfo(MMO);
203 }
204 
205 /* static */
206 Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo(
207     const MachineBasicBlock::iterator &MI) {
208   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
209 
210   if (!(!MI->mayLoad() && MI->mayStore()))
211     return None;
212   if (!MI->hasOneMemOperand())
213     return SIMemOpInfo();
214 
215   const MachineMemOperand *MMO = *MI->memoperands_begin();
216   if (!MMO->isAtomic())
217     return None;
218 
219   return SIMemOpInfo(MMO);
220 }
221 
222 /* static */
223 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo(
224     const MachineBasicBlock::iterator &MI) {
225   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
226 
227   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
228     return None;
229 
230   SyncScope::ID SSID =
231       static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
232   AtomicOrdering Ordering =
233       static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
234   return SIMemOpInfo(SSID, Ordering, AtomicOrdering::NotAtomic);
235 }
236 
237 /* static */
238 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgInfo(
239     const MachineBasicBlock::iterator &MI) {
240   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
241 
242   if (!(MI->mayLoad() && MI->mayStore()))
243     return None;
244   if (!MI->hasOneMemOperand())
245     return SIMemOpInfo();
246 
247   const MachineMemOperand *MMO = *MI->memoperands_begin();
248   if (!MMO->isAtomic())
249     return None;
250   if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic)
251     return None;
252 
253   return SIMemOpInfo(MMO);
254 }
255 
256 /* static */
257 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicRmwInfo(
258     const MachineBasicBlock::iterator &MI) {
259   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
260 
261   if (!(MI->mayLoad() && MI->mayStore()))
262     return None;
263   if (!MI->hasOneMemOperand())
264     return SIMemOpInfo();
265 
266   const MachineMemOperand *MMO = *MI->memoperands_begin();
267   if (!MMO->isAtomic())
268     return None;
269   if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
270     return None;
271 
272   return SIMemOpInfo(MMO);
273 }
274 
275 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
276                                                bool Before) const {
277   MachineBasicBlock &MBB = *MI->getParent();
278   DebugLoc DL = MI->getDebugLoc();
279 
280   if (!Before)
281     ++MI;
282 
283   BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode));
284 
285   if (!Before)
286     --MI;
287 
288   return true;
289 }
290 
291 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
292                                             bool Before) const {
293   MachineBasicBlock &MBB = *MI->getParent();
294   DebugLoc DL = MI->getDebugLoc();
295 
296   if (!Before)
297     ++MI;
298 
299   BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
300 
301   if (!Before)
302     --MI;
303 
304   return true;
305 }
306 
307 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const {
308   int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc);
309   if (GLCIdx == -1)
310     return false;
311 
312   MachineOperand &GLC = MI->getOperand(GLCIdx);
313   if (GLC.getImm() == 1)
314     return false;
315 
316   GLC.setImm(1);
317   return true;
318 }
319 
320 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
321   if (AtomicPseudoMIs.empty())
322     return false;
323 
324   for (auto &MI : AtomicPseudoMIs)
325     MI->eraseFromParent();
326 
327   AtomicPseudoMIs.clear();
328   return true;
329 }
330 
331 void SIMemoryLegalizer::reportUnknownSynchScope(
332     const MachineBasicBlock::iterator &MI) {
333   DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(),
334                                  "Unsupported synchronization scope");
335   CTX->diagnose(Diag);
336 }
337 
338 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
339                                    MachineBasicBlock::iterator &MI) {
340   assert(MI->mayLoad() && !MI->mayStore());
341 
342   bool Changed = false;
343   if (MOI.getSSID() == SyncScope::System ||
344       MOI.getSSID() == MMI->getAgentSSID()) {
345     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
346         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
347       Changed |= setGLC(MI);
348 
349     if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
350       Changed |= insertWaitcntVmcnt0(MI);
351 
352     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
353         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
354       Changed |= insertWaitcntVmcnt0(MI, false);
355       Changed |= insertBufferWbinvl1Vol(MI, false);
356     }
357 
358     return Changed;
359   } else if (MOI.getSSID() == SyncScope::SingleThread ||
360              MOI.getSSID() == MMI->getWorkgroupSSID() ||
361              MOI.getSSID() == MMI->getWavefrontSSID()) {
362     return Changed;
363   } else {
364     reportUnknownSynchScope(MI);
365     return Changed;
366   }
367 }
368 
369 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
370                                     MachineBasicBlock::iterator &MI) {
371   assert(!MI->mayLoad() && MI->mayStore());
372 
373   bool Changed = false;
374   if (MOI.getSSID() == SyncScope::System ||
375       MOI.getSSID() == MMI->getAgentSSID()) {
376     if (MOI.getOrdering() == AtomicOrdering::Release ||
377         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
378       Changed |= insertWaitcntVmcnt0(MI);
379 
380     return Changed;
381   } else if (MOI.getSSID() == SyncScope::SingleThread ||
382              MOI.getSSID() == MMI->getWorkgroupSSID() ||
383              MOI.getSSID() == MMI->getWavefrontSSID()) {
384     return Changed;
385   } else {
386     reportUnknownSynchScope(MI);
387     return Changed;
388   }
389 }
390 
391 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
392                                           MachineBasicBlock::iterator &MI) {
393   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
394 
395   bool Changed = false;
396   if (MOI.getSSID() == SyncScope::System ||
397       MOI.getSSID() == MMI->getAgentSSID()) {
398     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
399         MOI.getOrdering() == AtomicOrdering::Release ||
400         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
401         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
402       Changed |= insertWaitcntVmcnt0(MI);
403 
404     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
405         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
406         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
407       Changed |= insertBufferWbinvl1Vol(MI);
408 
409     AtomicPseudoMIs.push_back(MI);
410     return Changed;
411   } else if (MOI.getSSID() == SyncScope::SingleThread ||
412              MOI.getSSID() == MMI->getWorkgroupSSID() ||
413              MOI.getSSID() == MMI->getWavefrontSSID()) {
414     AtomicPseudoMIs.push_back(MI);
415     return Changed;
416   } else {
417     reportUnknownSynchScope(MI);
418     return Changed;
419   }
420 }
421 
422 bool SIMemoryLegalizer::expandAtomicCmpxchg(const SIMemOpInfo &MOI,
423                                             MachineBasicBlock::iterator &MI) {
424   assert(MI->mayLoad() && MI->mayStore());
425 
426   bool Changed = false;
427   if (MOI.getSSID() == SyncScope::System ||
428       MOI.getSSID() == MMI->getAgentSSID()) {
429     if (MOI.getOrdering() == AtomicOrdering::Release ||
430         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
431         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
432         MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
433       Changed |= insertWaitcntVmcnt0(MI);
434 
435     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
436         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
437         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
438         MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
439         MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
440       Changed |= insertWaitcntVmcnt0(MI, false);
441       Changed |= insertBufferWbinvl1Vol(MI, false);
442     }
443 
444     return Changed;
445   } else if (MOI.getSSID() == SyncScope::SingleThread ||
446              MOI.getSSID() == MMI->getWorkgroupSSID() ||
447              MOI.getSSID() == MMI->getWavefrontSSID()) {
448     Changed |= setGLC(MI);
449     return Changed;
450   } else {
451     reportUnknownSynchScope(MI);
452     return Changed;
453   }
454 }
455 
456 bool SIMemoryLegalizer::expandAtomicRmw(const SIMemOpInfo &MOI,
457                                         MachineBasicBlock::iterator &MI) {
458   assert(MI->mayLoad() && MI->mayStore());
459 
460   bool Changed = false;
461   if (MOI.getSSID() == SyncScope::System ||
462       MOI.getSSID() == MMI->getAgentSSID()) {
463     if (MOI.getOrdering() == AtomicOrdering::Release ||
464         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
465         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
466       Changed |= insertWaitcntVmcnt0(MI);
467 
468     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
469         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
470         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
471       Changed |= insertWaitcntVmcnt0(MI, false);
472       Changed |= insertBufferWbinvl1Vol(MI, false);
473     }
474 
475     return Changed;
476   } else if (MOI.getSSID() == SyncScope::SingleThread ||
477              MOI.getSSID() == MMI->getWorkgroupSSID() ||
478              MOI.getSSID() == MMI->getWavefrontSSID()) {
479     Changed |= setGLC(MI);
480     return Changed;
481   } else {
482     reportUnknownSynchScope(MI);
483     return Changed;
484   }
485 }
486 
487 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
488   bool Changed = false;
489   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
490   const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
491 
492   CTX = &MF.getFunction()->getContext();
493   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
494   TII = ST.getInstrInfo();
495 
496   Vmcnt0Immediate =
497       AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
498   Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
499       AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
500 
501   for (auto &MBB : MF) {
502     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
503       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
504         continue;
505 
506       if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI))
507         Changed |= expandLoad(MOI.getValue(), MI);
508       else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI))
509         Changed |= expandStore(MOI.getValue(), MI);
510       else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI))
511         Changed |= expandAtomicFence(MOI.getValue(), MI);
512       else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgInfo(MI))
513         Changed |= expandAtomicCmpxchg(MOI.getValue(), MI);
514       else if (const auto &MOI = SIMemOpInfo::getAtomicRmwInfo(MI))
515         Changed |= expandAtomicRmw(MOI.getValue(), MI);
516     }
517   }
518 
519   Changed |= removeAtomicPseudoMIs();
520   return Changed;
521 }
522 
523 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
524 
525 char SIMemoryLegalizer::ID = 0;
526 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
527 
528 FunctionPass *llvm::createSIMemoryLegalizerPass() {
529   return new SIMemoryLegalizer();
530 }
531