xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (revision 80528702c9f54212813231d80c63a9a599e40e60)
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Memory legalizer - implements memory model. More information can be
12 /// found here:
13 ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUMachineModuleInfo.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIDefines.h"
21 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineMemOperand.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include <cassert>
40 #include <list>
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 
45 #define DEBUG_TYPE "si-memory-legalizer"
46 #define PASS_NAME "SI Memory Legalizer"
47 
48 namespace {
49 
50 class SIMemOpInfo final {
51 private:
52   SyncScope::ID SSID = SyncScope::System;
53   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
54   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
55 
56   SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering)
57       : SSID(SSID), Ordering(Ordering) {}
58 
59   SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering,
60               AtomicOrdering FailureOrdering)
61       : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering) {}
62 
63 public:
64   /// \returns Synchronization scope ID of the machine instruction used to
65   /// create this SIMemOpInfo.
66   SyncScope::ID getSSID() const {
67     return SSID;
68   }
69   /// \returns Ordering constraint of the machine instruction used to
70   /// create this SIMemOpInfo.
71   AtomicOrdering getOrdering() const {
72     return Ordering;
73   }
74   /// \returns Failure ordering constraint of the machine instruction used to
75   /// create this SIMemOpInfo.
76   AtomicOrdering getFailureOrdering() const {
77     return FailureOrdering;
78   }
79 
80   /// \returns True if ordering constraint of the machine instruction used to
81   /// create this SIMemOpInfo is unordered or higher, false otherwise.
82   bool isAtomic() const {
83     return Ordering != AtomicOrdering::NotAtomic;
84   }
85 
86   /// \returns Load info if \p MI is a load operation, "None" otherwise.
87   static Optional<SIMemOpInfo> getLoadInfo(
88       const MachineBasicBlock::iterator &MI);
89   /// \returns Store info if \p MI is a store operation, "None" otherwise.
90   static Optional<SIMemOpInfo> getStoreInfo(
91       const MachineBasicBlock::iterator &MI);
92   /// \returns Atomic fence info if \p MI is an atomic fence operation,
93   /// "None" otherwise.
94   static Optional<SIMemOpInfo> getAtomicFenceInfo(
95       const MachineBasicBlock::iterator &MI);
96   /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation,
97   /// "None" otherwise.
98   static Optional<SIMemOpInfo> getAtomicCmpxchgInfo(
99       const MachineBasicBlock::iterator &MI);
100   /// \returns Atomic rmw info if \p MI is an atomic rmw operation,
101   /// "None" otherwise.
102   static Optional<SIMemOpInfo> getAtomicRmwInfo(
103       const MachineBasicBlock::iterator &MI);
104 };
105 
106 class SIMemoryLegalizer final : public MachineFunctionPass {
107 private:
108   /// \brief LLVM context.
109   LLVMContext *CTX = nullptr;
110 
111   /// \brief Machine module info.
112   const AMDGPUMachineModuleInfo *MMI = nullptr;
113 
114   /// \brief Instruction info.
115   const SIInstrInfo *TII = nullptr;
116 
117   /// \brief Immediate for "vmcnt(0)".
118   unsigned Vmcnt0Immediate = 0;
119 
120   /// \brief Opcode for cache invalidation instruction (L1).
121   unsigned Wbinvl1Opcode = 0;
122 
123   /// \brief List of atomic pseudo instructions.
124   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
125 
126   /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
127   /// Always returns true.
128   bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
129                               bool Before = true) const;
130   /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
131   /// Always returns true.
132   bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
133                            bool Before = true) const;
134 
135   /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is
136   /// modified, false otherwise.
137   bool setGLC(const MachineBasicBlock::iterator &MI) const;
138 
139   /// \brief Removes all processed atomic pseudo instructions from the current
140   /// function. Returns true if current function is modified, false otherwise.
141   bool removeAtomicPseudoMIs();
142 
143   /// \brief Reports unknown synchronization scope used in \p MI to LLVM
144   /// context.
145   void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI);
146 
147   /// \brief Expands load operation \p MI. Returns true if instructions are
148   /// added/deleted or \p MI is modified, false otherwise.
149   bool expandLoad(const SIMemOpInfo &MOI,
150                   MachineBasicBlock::iterator &MI);
151   /// \brief Expands store operation \p MI. Returns true if instructions are
152   /// added/deleted or \p MI is modified, false otherwise.
153   bool expandStore(const SIMemOpInfo &MOI,
154                    MachineBasicBlock::iterator &MI);
155   /// \brief Expands atomic fence operation \p MI. Returns true if
156   /// instructions are added/deleted or \p MI is modified, false otherwise.
157   bool expandAtomicFence(const SIMemOpInfo &MOI,
158                          MachineBasicBlock::iterator &MI);
159   /// \brief Expands atomic cmpxchg operation \p MI. Returns true if
160   /// instructions are added/deleted or \p MI is modified, false otherwise.
161   bool expandAtomicCmpxchg(const SIMemOpInfo &MOI,
162                            MachineBasicBlock::iterator &MI);
163   /// \brief Expands atomic rmw operation \p MI. Returns true if
164   /// instructions are added/deleted or \p MI is modified, false otherwise.
165   bool expandAtomicRmw(const SIMemOpInfo &MOI,
166                        MachineBasicBlock::iterator &MI);
167 
168 public:
169   static char ID;
170 
171   SIMemoryLegalizer() : MachineFunctionPass(ID) {}
172 
173   void getAnalysisUsage(AnalysisUsage &AU) const override {
174     AU.setPreservesCFG();
175     MachineFunctionPass::getAnalysisUsage(AU);
176   }
177 
178   StringRef getPassName() const override {
179     return PASS_NAME;
180   }
181 
182   bool runOnMachineFunction(MachineFunction &MF) override;
183 };
184 
185 } // end namespace anonymous
186 
187 /* static */
188 Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo(
189     const MachineBasicBlock::iterator &MI) {
190   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
191 
192   if (!(MI->mayLoad() && !MI->mayStore()))
193     return None;
194   if (!MI->hasOneMemOperand())
195     return SIMemOpInfo(SyncScope::System,
196                        AtomicOrdering::SequentiallyConsistent);
197 
198   const MachineMemOperand *MMO = *MI->memoperands_begin();
199   return SIMemOpInfo(MMO->getSyncScopeID(), MMO->getOrdering());
200 }
201 
202 /* static */
203 Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo(
204     const MachineBasicBlock::iterator &MI) {
205   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
206 
207   if (!(!MI->mayLoad() && MI->mayStore()))
208     return None;
209   if (!MI->hasOneMemOperand())
210     return SIMemOpInfo(SyncScope::System,
211                        AtomicOrdering::SequentiallyConsistent);
212 
213   const MachineMemOperand *MMO = *MI->memoperands_begin();
214   return SIMemOpInfo(MMO->getSyncScopeID(), MMO->getOrdering());
215 }
216 
217 /* static */
218 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo(
219     const MachineBasicBlock::iterator &MI) {
220   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
221 
222   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
223     return None;
224 
225   SyncScope::ID SSID =
226       static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
227   AtomicOrdering Ordering =
228       static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
229   return SIMemOpInfo(SSID, Ordering);
230 }
231 
232 /* static */
233 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgInfo(
234     const MachineBasicBlock::iterator &MI) {
235   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
236 
237   if (!(MI->mayLoad() && MI->mayStore()))
238     return None;
239   if (!MI->hasOneMemOperand())
240     return SIMemOpInfo(SyncScope::System,
241                        AtomicOrdering::SequentiallyConsistent,
242                        AtomicOrdering::SequentiallyConsistent);
243 
244   const MachineMemOperand *MMO = *MI->memoperands_begin();
245   return SIMemOpInfo(MMO->getSyncScopeID(), MMO->getOrdering(),
246                      MMO->getFailureOrdering());
247 }
248 
249 /* static */
250 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicRmwInfo(
251     const MachineBasicBlock::iterator &MI) {
252   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
253 
254   if (!(MI->mayLoad() && MI->mayStore()))
255     return None;
256   if (!MI->hasOneMemOperand())
257     return SIMemOpInfo(SyncScope::System,
258                        AtomicOrdering::SequentiallyConsistent);
259 
260   const MachineMemOperand *MMO = *MI->memoperands_begin();
261   return SIMemOpInfo(MMO->getSyncScopeID(), MMO->getOrdering());
262 }
263 
264 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
265                                                bool Before) const {
266   MachineBasicBlock &MBB = *MI->getParent();
267   DebugLoc DL = MI->getDebugLoc();
268 
269   if (!Before)
270     ++MI;
271 
272   BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode));
273 
274   if (!Before)
275     --MI;
276 
277   return true;
278 }
279 
280 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
281                                             bool Before) const {
282   MachineBasicBlock &MBB = *MI->getParent();
283   DebugLoc DL = MI->getDebugLoc();
284 
285   if (!Before)
286     ++MI;
287 
288   BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
289 
290   if (!Before)
291     --MI;
292 
293   return true;
294 }
295 
296 bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const {
297   int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc);
298   if (GLCIdx == -1)
299     return false;
300 
301   MachineOperand &GLC = MI->getOperand(GLCIdx);
302   if (GLC.getImm() == 1)
303     return false;
304 
305   GLC.setImm(1);
306   return true;
307 }
308 
309 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
310   if (AtomicPseudoMIs.empty())
311     return false;
312 
313   for (auto &MI : AtomicPseudoMIs)
314     MI->eraseFromParent();
315 
316   AtomicPseudoMIs.clear();
317   return true;
318 }
319 
320 void SIMemoryLegalizer::reportUnknownSynchScope(
321     const MachineBasicBlock::iterator &MI) {
322   DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(),
323                                  "Unsupported synchronization scope");
324   CTX->diagnose(Diag);
325 }
326 
327 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
328                                    MachineBasicBlock::iterator &MI) {
329   assert(MI->mayLoad() && !MI->mayStore());
330 
331   bool Changed = false;
332 
333   if (MOI.isAtomic()) {
334     if (MOI.getSSID() == SyncScope::System ||
335         MOI.getSSID() == MMI->getAgentSSID()) {
336       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
337           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
338         Changed |= setGLC(MI);
339 
340       if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
341         Changed |= insertWaitcntVmcnt0(MI);
342 
343       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
344           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
345         Changed |= insertWaitcntVmcnt0(MI, false);
346         Changed |= insertBufferWbinvl1Vol(MI, false);
347       }
348 
349       return Changed;
350     } else if (MOI.getSSID() == SyncScope::SingleThread ||
351                MOI.getSSID() == MMI->getWorkgroupSSID() ||
352                MOI.getSSID() == MMI->getWavefrontSSID()) {
353       return Changed;
354     } else {
355       reportUnknownSynchScope(MI);
356       return Changed;
357     }
358   }
359 
360   return Changed;
361 }
362 
363 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
364                                     MachineBasicBlock::iterator &MI) {
365   assert(!MI->mayLoad() && MI->mayStore());
366 
367   bool Changed = false;
368 
369   if (MOI.isAtomic()) {
370     if (MOI.getSSID() == SyncScope::System ||
371         MOI.getSSID() == MMI->getAgentSSID()) {
372       if (MOI.getOrdering() == AtomicOrdering::Release ||
373           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
374         Changed |= insertWaitcntVmcnt0(MI);
375 
376       return Changed;
377     } else if (MOI.getSSID() == SyncScope::SingleThread ||
378                MOI.getSSID() == MMI->getWorkgroupSSID() ||
379                MOI.getSSID() == MMI->getWavefrontSSID()) {
380       return Changed;
381     } else {
382       reportUnknownSynchScope(MI);
383       return Changed;
384     }
385   }
386 
387   return Changed;
388 }
389 
390 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
391                                           MachineBasicBlock::iterator &MI) {
392   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
393 
394   bool Changed = false;
395 
396   if (MOI.isAtomic()) {
397     if (MOI.getSSID() == SyncScope::System ||
398         MOI.getSSID() == MMI->getAgentSSID()) {
399       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
400           MOI.getOrdering() == AtomicOrdering::Release ||
401           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
402           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
403         Changed |= insertWaitcntVmcnt0(MI);
404 
405       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
406           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
407           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
408         Changed |= insertBufferWbinvl1Vol(MI);
409 
410       AtomicPseudoMIs.push_back(MI);
411       return Changed;
412     } else if (MOI.getSSID() == SyncScope::SingleThread ||
413                MOI.getSSID() == MMI->getWorkgroupSSID() ||
414                MOI.getSSID() == MMI->getWavefrontSSID()) {
415       AtomicPseudoMIs.push_back(MI);
416       return Changed;
417     } else {
418       reportUnknownSynchScope(MI);
419       return Changed;
420     }
421   }
422 
423   return Changed;
424 }
425 
426 bool SIMemoryLegalizer::expandAtomicCmpxchg(const SIMemOpInfo &MOI,
427                                             MachineBasicBlock::iterator &MI) {
428   assert(MI->mayLoad() && MI->mayStore());
429 
430   bool Changed = false;
431 
432   if (MOI.isAtomic()) {
433     if (MOI.getSSID() == SyncScope::System ||
434         MOI.getSSID() == MMI->getAgentSSID()) {
435       if (MOI.getOrdering() == AtomicOrdering::Release ||
436           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
437           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
438           MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
439         Changed |= insertWaitcntVmcnt0(MI);
440 
441       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
442           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
443           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
444           MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
445           MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
446         Changed |= insertWaitcntVmcnt0(MI, false);
447         Changed |= insertBufferWbinvl1Vol(MI, false);
448       }
449 
450       return Changed;
451     } else if (MOI.getSSID() == SyncScope::SingleThread ||
452                MOI.getSSID() == MMI->getWorkgroupSSID() ||
453                MOI.getSSID() == MMI->getWavefrontSSID()) {
454       Changed |= setGLC(MI);
455       return Changed;
456     } else {
457       reportUnknownSynchScope(MI);
458       return Changed;
459     }
460   }
461 
462   return Changed;
463 }
464 
465 bool SIMemoryLegalizer::expandAtomicRmw(const SIMemOpInfo &MOI,
466                                         MachineBasicBlock::iterator &MI) {
467   assert(MI->mayLoad() && MI->mayStore());
468 
469   bool Changed = false;
470 
471   if (MOI.isAtomic()) {
472     if (MOI.getSSID() == SyncScope::System ||
473         MOI.getSSID() == MMI->getAgentSSID()) {
474       if (MOI.getOrdering() == AtomicOrdering::Release ||
475           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
476           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
477         Changed |= insertWaitcntVmcnt0(MI);
478 
479       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
480           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
481           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
482         Changed |= insertWaitcntVmcnt0(MI, false);
483         Changed |= insertBufferWbinvl1Vol(MI, false);
484       }
485 
486       return Changed;
487     } else if (MOI.getSSID() == SyncScope::SingleThread ||
488                MOI.getSSID() == MMI->getWorkgroupSSID() ||
489                MOI.getSSID() == MMI->getWavefrontSSID()) {
490       Changed |= setGLC(MI);
491       return Changed;
492     } else {
493       reportUnknownSynchScope(MI);
494       return Changed;
495     }
496   }
497 
498   return Changed;
499 }
500 
501 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
502   bool Changed = false;
503   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
504   const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
505 
506   CTX = &MF.getFunction()->getContext();
507   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
508   TII = ST.getInstrInfo();
509 
510   Vmcnt0Immediate =
511       AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
512   Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
513       AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
514 
515   for (auto &MBB : MF) {
516     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
517       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
518         continue;
519 
520       if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI))
521         Changed |= expandLoad(MOI.getValue(), MI);
522       else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI))
523         Changed |= expandStore(MOI.getValue(), MI);
524       else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI))
525         Changed |= expandAtomicFence(MOI.getValue(), MI);
526       else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgInfo(MI))
527         Changed |= expandAtomicCmpxchg(MOI.getValue(), MI);
528       else if (const auto &MOI = SIMemOpInfo::getAtomicRmwInfo(MI))
529         Changed |= expandAtomicRmw(MOI.getValue(), MI);
530     }
531   }
532 
533   Changed |= removeAtomicPseudoMIs();
534   return Changed;
535 }
536 
537 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
538 
539 char SIMemoryLegalizer::ID = 0;
540 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
541 
542 FunctionPass *llvm::createSIMemoryLegalizerPass() {
543   return new SIMemoryLegalizer();
544 }
545