xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (revision 9c6cd0458b68c0d2c399ccf7592331227f02878f)
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Memory legalizer - implements memory model. More information can be
12 /// found here:
13 ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUMachineModuleInfo.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIDefines.h"
21 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineMemOperand.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include <cassert>
40 #include <list>
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 
45 #define DEBUG_TYPE "si-memory-legalizer"
46 #define PASS_NAME "SI Memory Legalizer"
47 
48 namespace {
49 
50 class SIMemOpInfo final {
51 private:
52   SyncScope::ID SSID = SyncScope::System;
53   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
54   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
55   bool IsNonTemporal = false;
56 
57   SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering)
58       : SSID(SSID), Ordering(Ordering) {}
59 
60   SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering,
61               AtomicOrdering FailureOrdering, bool IsNonTemporal = false)
62       : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering),
63         IsNonTemporal(IsNonTemporal) {}
64 
65   /// \returns Info constructed from \p MI, which has at least machine memory
66   /// operand.
67   static Optional<SIMemOpInfo> constructFromMIWithMMO(
68       const MachineBasicBlock::iterator &MI);
69 
70 public:
71   /// \returns Synchronization scope ID of the machine instruction used to
72   /// create this SIMemOpInfo.
73   SyncScope::ID getSSID() const {
74     return SSID;
75   }
76   /// \returns Ordering constraint of the machine instruction used to
77   /// create this SIMemOpInfo.
78   AtomicOrdering getOrdering() const {
79     return Ordering;
80   }
81   /// \returns Failure ordering constraint of the machine instruction used to
82   /// create this SIMemOpInfo.
83   AtomicOrdering getFailureOrdering() const {
84     return FailureOrdering;
85   }
86   /// \returns True if memory access of the machine instruction used to
87   /// create this SIMemOpInfo is non-temporal, false otherwise.
88   bool isNonTemporal() const {
89     return IsNonTemporal;
90   }
91 
92   /// \returns True if ordering constraint of the machine instruction used to
93   /// create this SIMemOpInfo is unordered or higher, false otherwise.
94   bool isAtomic() const {
95     return Ordering != AtomicOrdering::NotAtomic;
96   }
97 
98   /// \returns Load info if \p MI is a load operation, "None" otherwise.
99   static Optional<SIMemOpInfo> getLoadInfo(
100       const MachineBasicBlock::iterator &MI);
101   /// \returns Store info if \p MI is a store operation, "None" otherwise.
102   static Optional<SIMemOpInfo> getStoreInfo(
103       const MachineBasicBlock::iterator &MI);
104   /// \returns Atomic fence info if \p MI is an atomic fence operation,
105   /// "None" otherwise.
106   static Optional<SIMemOpInfo> getAtomicFenceInfo(
107       const MachineBasicBlock::iterator &MI);
108   /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
109   /// rmw operation, "None" otherwise.
110   static Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
111       const MachineBasicBlock::iterator &MI);
112 
113   /// \brief Reports unknown synchronization scope used in \p MI to LLVM
114   /// context.
115   static void reportUnknownSyncScope(
116       const MachineBasicBlock::iterator &MI);
117 };
118 
119 class SIMemoryLegalizer final : public MachineFunctionPass {
120 private:
121   /// \brief Machine module info.
122   const AMDGPUMachineModuleInfo *MMI = nullptr;
123 
124   /// \brief Instruction info.
125   const SIInstrInfo *TII = nullptr;
126 
127   /// \brief Immediate for "vmcnt(0)".
128   unsigned Vmcnt0Immediate = 0;
129 
130   /// \brief Opcode for cache invalidation instruction (L1).
131   unsigned VmemSIMDCacheInvalidateOpc = 0;
132 
133   /// \brief List of atomic pseudo instructions.
134   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
135 
136   /// \brief Sets named bit (BitName) to "true" if present in \p MI. Returns
137   /// true if \p MI is modified, false otherwise.
138   template <uint16_t BitName>
139   bool enableNamedBit(const MachineBasicBlock::iterator &MI) const {
140     int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
141     if (BitIdx == -1)
142       return false;
143 
144     MachineOperand &Bit = MI->getOperand(BitIdx);
145     if (Bit.getImm() != 0)
146       return false;
147 
148     Bit.setImm(1);
149     return true;
150   }
151 
152   /// \brief Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
153   /// is modified, false otherwise.
154   bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
155     return enableNamedBit<AMDGPU::OpName::glc>(MI);
156   }
157 
158   /// \brief Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
159   /// is modified, false otherwise.
160   bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
161     return enableNamedBit<AMDGPU::OpName::slc>(MI);
162   }
163 
164   /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
165   /// Always returns true.
166   bool insertVmemSIMDCacheInvalidate(MachineBasicBlock::iterator &MI,
167                                      bool Before = true) const;
168   /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
169   /// Always returns true.
170   bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
171                            bool Before = true) const;
172 
173   /// \brief Removes all processed atomic pseudo instructions from the current
174   /// function. Returns true if current function is modified, false otherwise.
175   bool removeAtomicPseudoMIs();
176 
177   /// \brief Expands load operation \p MI. Returns true if instructions are
178   /// added/deleted or \p MI is modified, false otherwise.
179   bool expandLoad(const SIMemOpInfo &MOI,
180                   MachineBasicBlock::iterator &MI);
181   /// \brief Expands store operation \p MI. Returns true if instructions are
182   /// added/deleted or \p MI is modified, false otherwise.
183   bool expandStore(const SIMemOpInfo &MOI,
184                    MachineBasicBlock::iterator &MI);
185   /// \brief Expands atomic fence operation \p MI. Returns true if
186   /// instructions are added/deleted or \p MI is modified, false otherwise.
187   bool expandAtomicFence(const SIMemOpInfo &MOI,
188                          MachineBasicBlock::iterator &MI);
189   /// \brief Expands atomic cmpxchg or rmw operation \p MI. Returns true if
190   /// instructions are added/deleted or \p MI is modified, false otherwise.
191   bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
192                                 MachineBasicBlock::iterator &MI);
193 
194 public:
195   static char ID;
196 
197   SIMemoryLegalizer() : MachineFunctionPass(ID) {}
198 
199   void getAnalysisUsage(AnalysisUsage &AU) const override {
200     AU.setPreservesCFG();
201     MachineFunctionPass::getAnalysisUsage(AU);
202   }
203 
204   StringRef getPassName() const override {
205     return PASS_NAME;
206   }
207 
208   bool runOnMachineFunction(MachineFunction &MF) override;
209 };
210 
211 } // end namespace anonymous
212 
213 /* static */
214 Optional<SIMemOpInfo> SIMemOpInfo::constructFromMIWithMMO(
215     const MachineBasicBlock::iterator &MI) {
216   assert(MI->getNumMemOperands() > 0);
217 
218   const MachineFunction *MF = MI->getParent()->getParent();
219   const AMDGPUMachineModuleInfo *MMI =
220       &MF->getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
221 
222   SyncScope::ID SSID = SyncScope::SingleThread;
223   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
224   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
225   bool IsNonTemporal = true;
226 
227   // Validator should check whether or not MMOs cover the entire set of
228   // locations accessed by the memory instruction.
229   for (const auto &MMO : MI->memoperands()) {
230     const auto &IsSyncScopeInclusion =
231         MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
232     if (!IsSyncScopeInclusion) {
233       reportUnknownSyncScope(MI);
234       return None;
235     }
236 
237     SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
238     Ordering =
239         isStrongerThan(Ordering, MMO->getOrdering()) ?
240             Ordering : MMO->getOrdering();
241     FailureOrdering =
242         isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
243             FailureOrdering : MMO->getFailureOrdering();
244 
245     if (!(MMO->getFlags() & MachineMemOperand::MONonTemporal))
246       IsNonTemporal = false;
247   }
248 
249   return SIMemOpInfo(SSID, Ordering, FailureOrdering, IsNonTemporal);
250 }
251 
252 /* static */
253 Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo(
254     const MachineBasicBlock::iterator &MI) {
255   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
256 
257   if (!(MI->mayLoad() && !MI->mayStore()))
258     return None;
259 
260   // Be conservative if there are no memory operands.
261   if (MI->getNumMemOperands() == 0)
262     return SIMemOpInfo(SyncScope::System,
263                        AtomicOrdering::SequentiallyConsistent);
264 
265   return SIMemOpInfo::constructFromMIWithMMO(MI);
266 }
267 
268 /* static */
269 Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo(
270     const MachineBasicBlock::iterator &MI) {
271   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
272 
273   if (!(!MI->mayLoad() && MI->mayStore()))
274     return None;
275 
276   // Be conservative if there are no memory operands.
277   if (MI->getNumMemOperands() == 0)
278     return SIMemOpInfo(SyncScope::System,
279                        AtomicOrdering::SequentiallyConsistent);
280 
281   return SIMemOpInfo::constructFromMIWithMMO(MI);
282 }
283 
284 /* static */
285 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo(
286     const MachineBasicBlock::iterator &MI) {
287   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
288 
289   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
290     return None;
291 
292   SyncScope::ID SSID =
293       static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
294   AtomicOrdering Ordering =
295       static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
296   return SIMemOpInfo(SSID, Ordering);
297 }
298 
299 /* static */
300 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgOrRmwInfo(
301     const MachineBasicBlock::iterator &MI) {
302   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
303 
304   if (!(MI->mayLoad() && MI->mayStore()))
305     return None;
306 
307   // Be conservative if there are no memory operands.
308   if (MI->getNumMemOperands() == 0)
309     return SIMemOpInfo(SyncScope::System,
310                        AtomicOrdering::SequentiallyConsistent,
311                        AtomicOrdering::SequentiallyConsistent);
312 
313   return SIMemOpInfo::constructFromMIWithMMO(MI);
314 }
315 
316 /* static */
317 void SIMemOpInfo::reportUnknownSyncScope(
318     const MachineBasicBlock::iterator &MI) {
319   DiagnosticInfoUnsupported Diag(MI->getParent()->getParent()->getFunction(),
320                                  "Unsupported synchronization scope");
321   LLVMContext *CTX = &MI->getParent()->getParent()->getFunction().getContext();
322   CTX->diagnose(Diag);
323 }
324 
325 bool SIMemoryLegalizer::insertVmemSIMDCacheInvalidate(
326   MachineBasicBlock::iterator &MI, bool Before) const {
327   MachineBasicBlock &MBB = *MI->getParent();
328   DebugLoc DL = MI->getDebugLoc();
329 
330   if (!Before)
331     ++MI;
332 
333   BuildMI(MBB, MI, DL, TII->get(VmemSIMDCacheInvalidateOpc));
334 
335   if (!Before)
336     --MI;
337 
338   return true;
339 }
340 
341 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
342                                             bool Before) const {
343   MachineBasicBlock &MBB = *MI->getParent();
344   DebugLoc DL = MI->getDebugLoc();
345 
346   if (!Before)
347     ++MI;
348 
349   BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
350 
351   if (!Before)
352     --MI;
353 
354   return true;
355 }
356 
357 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
358   if (AtomicPseudoMIs.empty())
359     return false;
360 
361   for (auto &MI : AtomicPseudoMIs)
362     MI->eraseFromParent();
363 
364   AtomicPseudoMIs.clear();
365   return true;
366 }
367 
368 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
369                                    MachineBasicBlock::iterator &MI) {
370   assert(MI->mayLoad() && !MI->mayStore());
371 
372   bool Changed = false;
373 
374   if (MOI.isAtomic()) {
375     if (MOI.getSSID() == SyncScope::System ||
376         MOI.getSSID() == MMI->getAgentSSID()) {
377       if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
378           MOI.getOrdering() == AtomicOrdering::Acquire ||
379           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
380         Changed |= enableGLCBit(MI);
381 
382       if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
383         Changed |= insertWaitcntVmcnt0(MI);
384 
385       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
386           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
387         Changed |= insertWaitcntVmcnt0(MI, false);
388         Changed |= insertVmemSIMDCacheInvalidate(MI, false);
389       }
390 
391       return Changed;
392     }
393 
394     if (MOI.getSSID() == SyncScope::SingleThread ||
395         MOI.getSSID() == MMI->getWorkgroupSSID() ||
396         MOI.getSSID() == MMI->getWavefrontSSID()) {
397       return Changed;
398     }
399 
400     llvm_unreachable("Unsupported synchronization scope");
401   }
402 
403   // Atomic instructions do not have the nontemporal attribute.
404   if (MOI.isNonTemporal()) {
405     Changed |= enableGLCBit(MI);
406     Changed |= enableSLCBit(MI);
407     return Changed;
408   }
409 
410   return Changed;
411 }
412 
413 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
414                                     MachineBasicBlock::iterator &MI) {
415   assert(!MI->mayLoad() && MI->mayStore());
416 
417   bool Changed = false;
418 
419   if (MOI.isAtomic()) {
420     if (MOI.getSSID() == SyncScope::System ||
421         MOI.getSSID() == MMI->getAgentSSID()) {
422       if (MOI.getOrdering() == AtomicOrdering::Release ||
423           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
424         Changed |= insertWaitcntVmcnt0(MI);
425 
426       return Changed;
427     }
428 
429     if (MOI.getSSID() == SyncScope::SingleThread ||
430         MOI.getSSID() == MMI->getWorkgroupSSID() ||
431         MOI.getSSID() == MMI->getWavefrontSSID()) {
432       return Changed;
433     }
434 
435     llvm_unreachable("Unsupported synchronization scope");
436   }
437 
438   // Atomic instructions do not have the nontemporal attribute.
439   if (MOI.isNonTemporal()) {
440     Changed |= enableGLCBit(MI);
441     Changed |= enableSLCBit(MI);
442     return Changed;
443   }
444 
445   return Changed;
446 }
447 
448 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
449                                           MachineBasicBlock::iterator &MI) {
450   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
451 
452   bool Changed = false;
453 
454   if (MOI.isAtomic()) {
455     if (MOI.getSSID() == SyncScope::System ||
456         MOI.getSSID() == MMI->getAgentSSID()) {
457       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
458           MOI.getOrdering() == AtomicOrdering::Release ||
459           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
460           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
461         Changed |= insertWaitcntVmcnt0(MI);
462 
463       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
464           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
465           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
466         Changed |= insertVmemSIMDCacheInvalidate(MI);
467 
468       AtomicPseudoMIs.push_back(MI);
469       return Changed;
470     }
471 
472     if (MOI.getSSID() == SyncScope::SingleThread ||
473         MOI.getSSID() == MMI->getWorkgroupSSID() ||
474         MOI.getSSID() == MMI->getWavefrontSSID()) {
475       AtomicPseudoMIs.push_back(MI);
476       return Changed;
477     }
478 
479     SIMemOpInfo::reportUnknownSyncScope(MI);
480   }
481 
482   return Changed;
483 }
484 
485 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
486   MachineBasicBlock::iterator &MI) {
487   assert(MI->mayLoad() && MI->mayStore());
488 
489   bool Changed = false;
490 
491   if (MOI.isAtomic()) {
492     if (MOI.getSSID() == SyncScope::System ||
493         MOI.getSSID() == MMI->getAgentSSID()) {
494       if (MOI.getOrdering() == AtomicOrdering::Release ||
495           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
496           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
497           MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
498         Changed |= insertWaitcntVmcnt0(MI);
499 
500       if (MOI.getOrdering() == AtomicOrdering::Acquire ||
501           MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
502           MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
503           MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
504           MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
505         Changed |= insertWaitcntVmcnt0(MI, false);
506         Changed |= insertVmemSIMDCacheInvalidate(MI, false);
507       }
508 
509       return Changed;
510     }
511 
512     if (MOI.getSSID() == SyncScope::SingleThread ||
513         MOI.getSSID() == MMI->getWorkgroupSSID() ||
514         MOI.getSSID() == MMI->getWavefrontSSID()) {
515       Changed |= enableGLCBit(MI);
516       return Changed;
517     }
518 
519     llvm_unreachable("Unsupported synchronization scope");
520   }
521 
522   return Changed;
523 }
524 
525 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
526   bool Changed = false;
527   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
528   const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
529 
530   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
531   TII = ST.getInstrInfo();
532 
533   Vmcnt0Immediate =
534       AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
535   VmemSIMDCacheInvalidateOpc =
536      ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
537        AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
538 
539   for (auto &MBB : MF) {
540     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
541       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
542         continue;
543 
544       if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI))
545         Changed |= expandLoad(MOI.getValue(), MI);
546       else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI))
547         Changed |= expandStore(MOI.getValue(), MI);
548       else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI))
549         Changed |= expandAtomicFence(MOI.getValue(), MI);
550       else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgOrRmwInfo(MI))
551         Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
552     }
553   }
554 
555   Changed |= removeAtomicPseudoMIs();
556   return Changed;
557 }
558 
559 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
560 
561 char SIMemoryLegalizer::ID = 0;
562 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
563 
564 FunctionPass *llvm::createSIMemoryLegalizerPass() {
565   return new SIMemoryLegalizer();
566 }
567