xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15ffd83dbSDimitry Andric //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric /// \file
105ffd83dbSDimitry Andric /// Insert s_clause instructions to form hard clauses.
115ffd83dbSDimitry Andric ///
125ffd83dbSDimitry Andric /// Clausing load instructions can give cache coherency benefits. Before gfx10,
135ffd83dbSDimitry Andric /// the hardware automatically detected "soft clauses", which were sequences of
145ffd83dbSDimitry Andric /// memory instructions of the same type. In gfx10 this detection was removed,
155ffd83dbSDimitry Andric /// and the s_clause instruction was introduced to explicitly mark "hard
165ffd83dbSDimitry Andric /// clauses".
175ffd83dbSDimitry Andric ///
185ffd83dbSDimitry Andric /// It's the scheduler's job to form the clauses by putting similar memory
195ffd83dbSDimitry Andric /// instructions next to each other. Our job is just to insert an s_clause
205ffd83dbSDimitry Andric /// instruction to mark the start of each clause.
215ffd83dbSDimitry Andric ///
225ffd83dbSDimitry Andric /// Note that hard clauses are very similar to, but logically distinct from, the
235ffd83dbSDimitry Andric /// groups of instructions that have to be restartable when XNACK is enabled.
245ffd83dbSDimitry Andric /// The rules are slightly different in each case. For example an s_nop
255ffd83dbSDimitry Andric /// instruction breaks a restartable group, but can appear in the middle of a
265ffd83dbSDimitry Andric /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
275ffd83dbSDimitry Andric /// "soft clauses" or just "clauses".)
285ffd83dbSDimitry Andric ///
295ffd83dbSDimitry Andric /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
305ffd83dbSDimitry Andric /// groups, not hard clauses.
315ffd83dbSDimitry Andric //
325ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
335ffd83dbSDimitry Andric 
34e8d8bef9SDimitry Andric #include "AMDGPU.h"
35e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
36e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
375ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h"
3881ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
395ffd83dbSDimitry Andric 
405ffd83dbSDimitry Andric using namespace llvm;
415ffd83dbSDimitry Andric 
425ffd83dbSDimitry Andric #define DEBUG_TYPE "si-insert-hard-clauses"
435ffd83dbSDimitry Andric 
445ffd83dbSDimitry Andric namespace {
455ffd83dbSDimitry Andric 
465ffd83dbSDimitry Andric enum HardClauseType {
4781ad6265SDimitry Andric   // For GFX10:
4881ad6265SDimitry Andric 
495ffd83dbSDimitry Andric   // Texture, buffer, global or scratch memory instructions.
505ffd83dbSDimitry Andric   HARDCLAUSE_VMEM,
515ffd83dbSDimitry Andric   // Flat (not global or scratch) memory instructions.
525ffd83dbSDimitry Andric   HARDCLAUSE_FLAT,
5381ad6265SDimitry Andric 
5481ad6265SDimitry Andric   // For GFX11:
5581ad6265SDimitry Andric 
5681ad6265SDimitry Andric   // Texture memory instructions.
5781ad6265SDimitry Andric   HARDCLAUSE_MIMG_LOAD,
5881ad6265SDimitry Andric   HARDCLAUSE_MIMG_STORE,
5981ad6265SDimitry Andric   HARDCLAUSE_MIMG_ATOMIC,
6081ad6265SDimitry Andric   HARDCLAUSE_MIMG_SAMPLE,
6181ad6265SDimitry Andric   // Buffer, global or scratch memory instructions.
6281ad6265SDimitry Andric   HARDCLAUSE_VMEM_LOAD,
6381ad6265SDimitry Andric   HARDCLAUSE_VMEM_STORE,
6481ad6265SDimitry Andric   HARDCLAUSE_VMEM_ATOMIC,
6581ad6265SDimitry Andric   // Flat (not global or scratch) memory instructions.
6681ad6265SDimitry Andric   HARDCLAUSE_FLAT_LOAD,
6781ad6265SDimitry Andric   HARDCLAUSE_FLAT_STORE,
6881ad6265SDimitry Andric   HARDCLAUSE_FLAT_ATOMIC,
6981ad6265SDimitry Andric   // BVH instructions.
7081ad6265SDimitry Andric   HARDCLAUSE_BVH,
7181ad6265SDimitry Andric 
7281ad6265SDimitry Andric   // Common:
7381ad6265SDimitry Andric 
745ffd83dbSDimitry Andric   // Instructions that access LDS.
755ffd83dbSDimitry Andric   HARDCLAUSE_LDS,
765ffd83dbSDimitry Andric   // Scalar memory instructions.
775ffd83dbSDimitry Andric   HARDCLAUSE_SMEM,
785ffd83dbSDimitry Andric   // VALU instructions.
795ffd83dbSDimitry Andric   HARDCLAUSE_VALU,
805ffd83dbSDimitry Andric   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
815ffd83dbSDimitry Andric 
825ffd83dbSDimitry Andric   // Internal instructions, which are allowed in the middle of a hard clause,
835ffd83dbSDimitry Andric   // except for s_waitcnt.
845ffd83dbSDimitry Andric   HARDCLAUSE_INTERNAL,
85349cc55cSDimitry Andric   // Meta instructions that do not result in any ISA like KILL.
86349cc55cSDimitry Andric   HARDCLAUSE_IGNORE,
875ffd83dbSDimitry Andric   // Instructions that are not allowed in a hard clause: SALU, export, branch,
885ffd83dbSDimitry Andric   // message, GDS, s_waitcnt and anything else not mentioned above.
895ffd83dbSDimitry Andric   HARDCLAUSE_ILLEGAL,
905ffd83dbSDimitry Andric };
915ffd83dbSDimitry Andric 
92fe6060f1SDimitry Andric class SIInsertHardClauses : public MachineFunctionPass {
93fe6060f1SDimitry Andric public:
94fe6060f1SDimitry Andric   static char ID;
95fe6060f1SDimitry Andric   const GCNSubtarget *ST = nullptr;
96fe6060f1SDimitry Andric 
97fe6060f1SDimitry Andric   SIInsertHardClauses() : MachineFunctionPass(ID) {}
98fe6060f1SDimitry Andric 
99fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
100fe6060f1SDimitry Andric     AU.setPreservesCFG();
101fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
102fe6060f1SDimitry Andric   }
103fe6060f1SDimitry Andric 
1045ffd83dbSDimitry Andric   HardClauseType getHardClauseType(const MachineInstr &MI) {
10581ad6265SDimitry Andric     if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
10681ad6265SDimitry Andric       if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
107fe6060f1SDimitry Andric         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
108fe6060f1SDimitry Andric           if (ST->hasNSAClauseBug()) {
109fe6060f1SDimitry Andric             const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
110fe6060f1SDimitry Andric             if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
111fe6060f1SDimitry Andric               return HARDCLAUSE_ILLEGAL;
112fe6060f1SDimitry Andric           }
1135ffd83dbSDimitry Andric           return HARDCLAUSE_VMEM;
114fe6060f1SDimitry Andric         }
1155ffd83dbSDimitry Andric         if (SIInstrInfo::isFLAT(MI))
1165ffd83dbSDimitry Andric           return HARDCLAUSE_FLAT;
11781ad6265SDimitry Andric       } else {
11881ad6265SDimitry Andric         assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
11981ad6265SDimitry Andric         if (SIInstrInfo::isMIMG(MI)) {
12081ad6265SDimitry Andric           const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
12181ad6265SDimitry Andric           const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
12281ad6265SDimitry Andric               AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
12381ad6265SDimitry Andric           if (BaseInfo->BVH)
12481ad6265SDimitry Andric             return HARDCLAUSE_BVH;
12581ad6265SDimitry Andric           if (BaseInfo->Sampler)
12681ad6265SDimitry Andric             return HARDCLAUSE_MIMG_SAMPLE;
12781ad6265SDimitry Andric           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
12881ad6265SDimitry Andric                                               : HARDCLAUSE_MIMG_LOAD
12981ad6265SDimitry Andric                               : HARDCLAUSE_MIMG_STORE;
13081ad6265SDimitry Andric         }
13181ad6265SDimitry Andric         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
13281ad6265SDimitry Andric           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
13381ad6265SDimitry Andric                                               : HARDCLAUSE_VMEM_LOAD
13481ad6265SDimitry Andric                               : HARDCLAUSE_VMEM_STORE;
13581ad6265SDimitry Andric         }
13681ad6265SDimitry Andric         if (SIInstrInfo::isFLAT(MI)) {
13781ad6265SDimitry Andric           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
13881ad6265SDimitry Andric                                               : HARDCLAUSE_FLAT_LOAD
13981ad6265SDimitry Andric                               : HARDCLAUSE_FLAT_STORE;
14081ad6265SDimitry Andric         }
14181ad6265SDimitry Andric       }
1425ffd83dbSDimitry Andric       // TODO: LDS
1435ffd83dbSDimitry Andric       if (SIInstrInfo::isSMRD(MI))
1445ffd83dbSDimitry Andric         return HARDCLAUSE_SMEM;
1455ffd83dbSDimitry Andric     }
1465ffd83dbSDimitry Andric 
1475ffd83dbSDimitry Andric     // Don't form VALU clauses. It's not clear what benefit they give, if any.
1485ffd83dbSDimitry Andric 
1495ffd83dbSDimitry Andric     // In practice s_nop is the only internal instruction we're likely to see.
1505ffd83dbSDimitry Andric     // It's safe to treat the rest as illegal.
1515ffd83dbSDimitry Andric     if (MI.getOpcode() == AMDGPU::S_NOP)
1525ffd83dbSDimitry Andric       return HARDCLAUSE_INTERNAL;
153349cc55cSDimitry Andric     if (MI.isMetaInstruction())
154349cc55cSDimitry Andric       return HARDCLAUSE_IGNORE;
1555ffd83dbSDimitry Andric     return HARDCLAUSE_ILLEGAL;
1565ffd83dbSDimitry Andric   }
1575ffd83dbSDimitry Andric 
1585ffd83dbSDimitry Andric   // Track information about a clause as we discover it.
1595ffd83dbSDimitry Andric   struct ClauseInfo {
1605ffd83dbSDimitry Andric     // The type of all (non-internal) instructions in the clause.
1615ffd83dbSDimitry Andric     HardClauseType Type = HARDCLAUSE_ILLEGAL;
1625ffd83dbSDimitry Andric     // The first (necessarily non-internal) instruction in the clause.
1635ffd83dbSDimitry Andric     MachineInstr *First = nullptr;
1645ffd83dbSDimitry Andric     // The last non-internal instruction in the clause.
1655ffd83dbSDimitry Andric     MachineInstr *Last = nullptr;
1665ffd83dbSDimitry Andric     // The length of the clause including any internal instructions in the
167349cc55cSDimitry Andric     // middle (but not at the end) of the clause.
1685ffd83dbSDimitry Andric     unsigned Length = 0;
169349cc55cSDimitry Andric     // Internal instructions at the and of a clause should not be included in
170349cc55cSDimitry Andric     // the clause. Count them in TrailingInternalLength until a new memory
171349cc55cSDimitry Andric     // instruction is added.
172349cc55cSDimitry Andric     unsigned TrailingInternalLength = 0;
1735ffd83dbSDimitry Andric     // The base operands of *Last.
1745ffd83dbSDimitry Andric     SmallVector<const MachineOperand *, 4> BaseOps;
1755ffd83dbSDimitry Andric   };
1765ffd83dbSDimitry Andric 
1775ffd83dbSDimitry Andric   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
178349cc55cSDimitry Andric     if (CI.First == CI.Last)
1795ffd83dbSDimitry Andric       return false;
180*0fca6ea1SDimitry Andric     assert(CI.Length <= ST->maxHardClauseLength() &&
181*0fca6ea1SDimitry Andric            "Hard clause is too long!");
1825ffd83dbSDimitry Andric 
1835ffd83dbSDimitry Andric     auto &MBB = *CI.First->getParent();
1845ffd83dbSDimitry Andric     auto ClauseMI =
1855ffd83dbSDimitry Andric         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
186349cc55cSDimitry Andric             .addImm(CI.Length - 1);
1875ffd83dbSDimitry Andric     finalizeBundle(MBB, ClauseMI->getIterator(),
1885ffd83dbSDimitry Andric                    std::next(CI.Last->getIterator()));
1895ffd83dbSDimitry Andric     return true;
1905ffd83dbSDimitry Andric   }
1915ffd83dbSDimitry Andric 
1925ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override {
1935ffd83dbSDimitry Andric     if (skipFunction(MF.getFunction()))
1945ffd83dbSDimitry Andric       return false;
1955ffd83dbSDimitry Andric 
196fe6060f1SDimitry Andric     ST = &MF.getSubtarget<GCNSubtarget>();
197fe6060f1SDimitry Andric     if (!ST->hasHardClauses())
1985ffd83dbSDimitry Andric       return false;
1995ffd83dbSDimitry Andric 
200fe6060f1SDimitry Andric     const SIInstrInfo *SII = ST->getInstrInfo();
201fe6060f1SDimitry Andric     const TargetRegisterInfo *TRI = ST->getRegisterInfo();
2025ffd83dbSDimitry Andric 
2035ffd83dbSDimitry Andric     bool Changed = false;
2045ffd83dbSDimitry Andric     for (auto &MBB : MF) {
2055ffd83dbSDimitry Andric       ClauseInfo CI;
2065ffd83dbSDimitry Andric       for (auto &MI : MBB) {
2075ffd83dbSDimitry Andric         HardClauseType Type = getHardClauseType(MI);
2085ffd83dbSDimitry Andric 
2095ffd83dbSDimitry Andric         int64_t Dummy1;
2105ffd83dbSDimitry Andric         bool Dummy2;
211*0fca6ea1SDimitry Andric         LocationSize Dummy3 = 0;
2125ffd83dbSDimitry Andric         SmallVector<const MachineOperand *, 4> BaseOps;
2135ffd83dbSDimitry Andric         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
2145ffd83dbSDimitry Andric           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
2155ffd83dbSDimitry Andric                                                   Dummy3, TRI)) {
2165ffd83dbSDimitry Andric             // We failed to get the base operands, so we'll never clause this
2175ffd83dbSDimitry Andric             // instruction with any other, so pretend it's illegal.
2185ffd83dbSDimitry Andric             Type = HARDCLAUSE_ILLEGAL;
2195ffd83dbSDimitry Andric           }
2205ffd83dbSDimitry Andric         }
2215ffd83dbSDimitry Andric 
222*0fca6ea1SDimitry Andric         if (CI.Length == ST->maxHardClauseLength() ||
2235ffd83dbSDimitry Andric             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
224349cc55cSDimitry Andric              Type != HARDCLAUSE_IGNORE &&
2255ffd83dbSDimitry Andric              (Type != CI.Type ||
2265ffd83dbSDimitry Andric               // Note that we lie to shouldClusterMemOps about the size of the
2275ffd83dbSDimitry Andric               // cluster. When shouldClusterMemOps is called from the machine
2285ffd83dbSDimitry Andric               // scheduler it limits the size of the cluster to avoid increasing
2295ffd83dbSDimitry Andric               // register pressure too much, but this pass runs after register
2305ffd83dbSDimitry Andric               // allocation so there is no need for that kind of limit.
2315f757f3fSDimitry Andric               // We also lie about the Offset and OffsetIsScalable parameters,
2325f757f3fSDimitry Andric               // as they aren't used in the SIInstrInfo implementation.
2335f757f3fSDimitry Andric               !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
2345f757f3fSDimitry Andric                                         2, 2)))) {
2355ffd83dbSDimitry Andric           // Finish the current clause.
2365ffd83dbSDimitry Andric           Changed |= emitClause(CI, SII);
2375ffd83dbSDimitry Andric           CI = ClauseInfo();
2385ffd83dbSDimitry Andric         }
2395ffd83dbSDimitry Andric 
2405ffd83dbSDimitry Andric         if (CI.Length) {
2415ffd83dbSDimitry Andric           // Extend the current clause.
242349cc55cSDimitry Andric           if (Type != HARDCLAUSE_IGNORE) {
243349cc55cSDimitry Andric             if (Type == HARDCLAUSE_INTERNAL) {
244349cc55cSDimitry Andric               ++CI.TrailingInternalLength;
245349cc55cSDimitry Andric             } else {
2465ffd83dbSDimitry Andric               ++CI.Length;
247349cc55cSDimitry Andric               CI.Length += CI.TrailingInternalLength;
248349cc55cSDimitry Andric               CI.TrailingInternalLength = 0;
2495ffd83dbSDimitry Andric               CI.Last = &MI;
2505ffd83dbSDimitry Andric               CI.BaseOps = std::move(BaseOps);
2515ffd83dbSDimitry Andric             }
252349cc55cSDimitry Andric           }
2535ffd83dbSDimitry Andric         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
2545ffd83dbSDimitry Andric           // Start a new clause.
255349cc55cSDimitry Andric           CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
2565ffd83dbSDimitry Andric         }
2575ffd83dbSDimitry Andric       }
2585ffd83dbSDimitry Andric 
2595ffd83dbSDimitry Andric       // Finish the last clause in the basic block if any.
2605ffd83dbSDimitry Andric       if (CI.Length)
2615ffd83dbSDimitry Andric         Changed |= emitClause(CI, SII);
2625ffd83dbSDimitry Andric     }
2635ffd83dbSDimitry Andric 
2645ffd83dbSDimitry Andric     return Changed;
2655ffd83dbSDimitry Andric   }
2665ffd83dbSDimitry Andric };
2675ffd83dbSDimitry Andric 
2685ffd83dbSDimitry Andric } // namespace
2695ffd83dbSDimitry Andric 
2705ffd83dbSDimitry Andric char SIInsertHardClauses::ID = 0;
2715ffd83dbSDimitry Andric 
2725ffd83dbSDimitry Andric char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
2735ffd83dbSDimitry Andric 
2745ffd83dbSDimitry Andric INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
2755ffd83dbSDimitry Andric                 false, false)
276