15ffd83dbSDimitry Andric //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric /// \file 105ffd83dbSDimitry Andric /// Insert s_clause instructions to form hard clauses. 115ffd83dbSDimitry Andric /// 125ffd83dbSDimitry Andric /// Clausing load instructions can give cache coherency benefits. Before gfx10, 135ffd83dbSDimitry Andric /// the hardware automatically detected "soft clauses", which were sequences of 145ffd83dbSDimitry Andric /// memory instructions of the same type. In gfx10 this detection was removed, 155ffd83dbSDimitry Andric /// and the s_clause instruction was introduced to explicitly mark "hard 165ffd83dbSDimitry Andric /// clauses". 175ffd83dbSDimitry Andric /// 185ffd83dbSDimitry Andric /// It's the scheduler's job to form the clauses by putting similar memory 195ffd83dbSDimitry Andric /// instructions next to each other. Our job is just to insert an s_clause 205ffd83dbSDimitry Andric /// instruction to mark the start of each clause. 215ffd83dbSDimitry Andric /// 225ffd83dbSDimitry Andric /// Note that hard clauses are very similar to, but logically distinct from, the 235ffd83dbSDimitry Andric /// groups of instructions that have to be restartable when XNACK is enabled. 245ffd83dbSDimitry Andric /// The rules are slightly different in each case. For example an s_nop 255ffd83dbSDimitry Andric /// instruction breaks a restartable group, but can appear in the middle of a 265ffd83dbSDimitry Andric /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 275ffd83dbSDimitry Andric /// "soft clauses" or just "clauses".) 285ffd83dbSDimitry Andric /// 295ffd83dbSDimitry Andric /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 305ffd83dbSDimitry Andric /// groups, not hard clauses. 315ffd83dbSDimitry Andric // 325ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 335ffd83dbSDimitry Andric 34e8d8bef9SDimitry Andric #include "AMDGPU.h" 35e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 36e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 375ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 3881ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 395ffd83dbSDimitry Andric 405ffd83dbSDimitry Andric using namespace llvm; 415ffd83dbSDimitry Andric 425ffd83dbSDimitry Andric #define DEBUG_TYPE "si-insert-hard-clauses" 435ffd83dbSDimitry Andric 445ffd83dbSDimitry Andric namespace { 455ffd83dbSDimitry Andric 465ffd83dbSDimitry Andric enum HardClauseType { 4781ad6265SDimitry Andric // For GFX10: 4881ad6265SDimitry Andric 495ffd83dbSDimitry Andric // Texture, buffer, global or scratch memory instructions. 505ffd83dbSDimitry Andric HARDCLAUSE_VMEM, 515ffd83dbSDimitry Andric // Flat (not global or scratch) memory instructions. 525ffd83dbSDimitry Andric HARDCLAUSE_FLAT, 5381ad6265SDimitry Andric 5481ad6265SDimitry Andric // For GFX11: 5581ad6265SDimitry Andric 5681ad6265SDimitry Andric // Texture memory instructions. 5781ad6265SDimitry Andric HARDCLAUSE_MIMG_LOAD, 5881ad6265SDimitry Andric HARDCLAUSE_MIMG_STORE, 5981ad6265SDimitry Andric HARDCLAUSE_MIMG_ATOMIC, 6081ad6265SDimitry Andric HARDCLAUSE_MIMG_SAMPLE, 6181ad6265SDimitry Andric // Buffer, global or scratch memory instructions. 6281ad6265SDimitry Andric HARDCLAUSE_VMEM_LOAD, 6381ad6265SDimitry Andric HARDCLAUSE_VMEM_STORE, 6481ad6265SDimitry Andric HARDCLAUSE_VMEM_ATOMIC, 6581ad6265SDimitry Andric // Flat (not global or scratch) memory instructions. 6681ad6265SDimitry Andric HARDCLAUSE_FLAT_LOAD, 6781ad6265SDimitry Andric HARDCLAUSE_FLAT_STORE, 6881ad6265SDimitry Andric HARDCLAUSE_FLAT_ATOMIC, 6981ad6265SDimitry Andric // BVH instructions. 7081ad6265SDimitry Andric HARDCLAUSE_BVH, 7181ad6265SDimitry Andric 7281ad6265SDimitry Andric // Common: 7381ad6265SDimitry Andric 745ffd83dbSDimitry Andric // Instructions that access LDS. 755ffd83dbSDimitry Andric HARDCLAUSE_LDS, 765ffd83dbSDimitry Andric // Scalar memory instructions. 775ffd83dbSDimitry Andric HARDCLAUSE_SMEM, 785ffd83dbSDimitry Andric // VALU instructions. 795ffd83dbSDimitry Andric HARDCLAUSE_VALU, 805ffd83dbSDimitry Andric LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 815ffd83dbSDimitry Andric 825ffd83dbSDimitry Andric // Internal instructions, which are allowed in the middle of a hard clause, 835ffd83dbSDimitry Andric // except for s_waitcnt. 845ffd83dbSDimitry Andric HARDCLAUSE_INTERNAL, 85349cc55cSDimitry Andric // Meta instructions that do not result in any ISA like KILL. 86349cc55cSDimitry Andric HARDCLAUSE_IGNORE, 875ffd83dbSDimitry Andric // Instructions that are not allowed in a hard clause: SALU, export, branch, 885ffd83dbSDimitry Andric // message, GDS, s_waitcnt and anything else not mentioned above. 895ffd83dbSDimitry Andric HARDCLAUSE_ILLEGAL, 905ffd83dbSDimitry Andric }; 915ffd83dbSDimitry Andric 92fe6060f1SDimitry Andric class SIInsertHardClauses : public MachineFunctionPass { 93fe6060f1SDimitry Andric public: 94fe6060f1SDimitry Andric static char ID; 95fe6060f1SDimitry Andric const GCNSubtarget *ST = nullptr; 96fe6060f1SDimitry Andric 97fe6060f1SDimitry Andric SIInsertHardClauses() : MachineFunctionPass(ID) {} 98fe6060f1SDimitry Andric 99fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 100fe6060f1SDimitry Andric AU.setPreservesCFG(); 101fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 102fe6060f1SDimitry Andric } 103fe6060f1SDimitry Andric 1045ffd83dbSDimitry Andric HardClauseType getHardClauseType(const MachineInstr &MI) { 10581ad6265SDimitry Andric if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) { 10681ad6265SDimitry Andric if (ST->getGeneration() == AMDGPUSubtarget::GFX10) { 107fe6060f1SDimitry Andric if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { 108fe6060f1SDimitry Andric if (ST->hasNSAClauseBug()) { 109fe6060f1SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 110fe6060f1SDimitry Andric if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) 111fe6060f1SDimitry Andric return HARDCLAUSE_ILLEGAL; 112fe6060f1SDimitry Andric } 1135ffd83dbSDimitry Andric return HARDCLAUSE_VMEM; 114fe6060f1SDimitry Andric } 1155ffd83dbSDimitry Andric if (SIInstrInfo::isFLAT(MI)) 1165ffd83dbSDimitry Andric return HARDCLAUSE_FLAT; 11781ad6265SDimitry Andric } else { 11881ad6265SDimitry Andric assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11); 11981ad6265SDimitry Andric if (SIInstrInfo::isMIMG(MI)) { 12081ad6265SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 12181ad6265SDimitry Andric const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = 12281ad6265SDimitry Andric AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 12381ad6265SDimitry Andric if (BaseInfo->BVH) 12481ad6265SDimitry Andric return HARDCLAUSE_BVH; 12581ad6265SDimitry Andric if (BaseInfo->Sampler) 12681ad6265SDimitry Andric return HARDCLAUSE_MIMG_SAMPLE; 12781ad6265SDimitry Andric return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC 12881ad6265SDimitry Andric : HARDCLAUSE_MIMG_LOAD 12981ad6265SDimitry Andric : HARDCLAUSE_MIMG_STORE; 13081ad6265SDimitry Andric } 13181ad6265SDimitry Andric if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { 13281ad6265SDimitry Andric return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC 13381ad6265SDimitry Andric : HARDCLAUSE_VMEM_LOAD 13481ad6265SDimitry Andric : HARDCLAUSE_VMEM_STORE; 13581ad6265SDimitry Andric } 13681ad6265SDimitry Andric if (SIInstrInfo::isFLAT(MI)) { 13781ad6265SDimitry Andric return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC 13881ad6265SDimitry Andric : HARDCLAUSE_FLAT_LOAD 13981ad6265SDimitry Andric : HARDCLAUSE_FLAT_STORE; 14081ad6265SDimitry Andric } 14181ad6265SDimitry Andric } 1425ffd83dbSDimitry Andric // TODO: LDS 1435ffd83dbSDimitry Andric if (SIInstrInfo::isSMRD(MI)) 1445ffd83dbSDimitry Andric return HARDCLAUSE_SMEM; 1455ffd83dbSDimitry Andric } 1465ffd83dbSDimitry Andric 1475ffd83dbSDimitry Andric // Don't form VALU clauses. It's not clear what benefit they give, if any. 1485ffd83dbSDimitry Andric 1495ffd83dbSDimitry Andric // In practice s_nop is the only internal instruction we're likely to see. 1505ffd83dbSDimitry Andric // It's safe to treat the rest as illegal. 1515ffd83dbSDimitry Andric if (MI.getOpcode() == AMDGPU::S_NOP) 1525ffd83dbSDimitry Andric return HARDCLAUSE_INTERNAL; 153349cc55cSDimitry Andric if (MI.isMetaInstruction()) 154349cc55cSDimitry Andric return HARDCLAUSE_IGNORE; 1555ffd83dbSDimitry Andric return HARDCLAUSE_ILLEGAL; 1565ffd83dbSDimitry Andric } 1575ffd83dbSDimitry Andric 1585ffd83dbSDimitry Andric // Track information about a clause as we discover it. 1595ffd83dbSDimitry Andric struct ClauseInfo { 1605ffd83dbSDimitry Andric // The type of all (non-internal) instructions in the clause. 1615ffd83dbSDimitry Andric HardClauseType Type = HARDCLAUSE_ILLEGAL; 1625ffd83dbSDimitry Andric // The first (necessarily non-internal) instruction in the clause. 1635ffd83dbSDimitry Andric MachineInstr *First = nullptr; 1645ffd83dbSDimitry Andric // The last non-internal instruction in the clause. 1655ffd83dbSDimitry Andric MachineInstr *Last = nullptr; 1665ffd83dbSDimitry Andric // The length of the clause including any internal instructions in the 167349cc55cSDimitry Andric // middle (but not at the end) of the clause. 1685ffd83dbSDimitry Andric unsigned Length = 0; 169349cc55cSDimitry Andric // Internal instructions at the and of a clause should not be included in 170349cc55cSDimitry Andric // the clause. Count them in TrailingInternalLength until a new memory 171349cc55cSDimitry Andric // instruction is added. 172349cc55cSDimitry Andric unsigned TrailingInternalLength = 0; 1735ffd83dbSDimitry Andric // The base operands of *Last. 1745ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps; 1755ffd83dbSDimitry Andric }; 1765ffd83dbSDimitry Andric 1775ffd83dbSDimitry Andric bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 178349cc55cSDimitry Andric if (CI.First == CI.Last) 1795ffd83dbSDimitry Andric return false; 180*0fca6ea1SDimitry Andric assert(CI.Length <= ST->maxHardClauseLength() && 181*0fca6ea1SDimitry Andric "Hard clause is too long!"); 1825ffd83dbSDimitry Andric 1835ffd83dbSDimitry Andric auto &MBB = *CI.First->getParent(); 1845ffd83dbSDimitry Andric auto ClauseMI = 1855ffd83dbSDimitry Andric BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 186349cc55cSDimitry Andric .addImm(CI.Length - 1); 1875ffd83dbSDimitry Andric finalizeBundle(MBB, ClauseMI->getIterator(), 1885ffd83dbSDimitry Andric std::next(CI.Last->getIterator())); 1895ffd83dbSDimitry Andric return true; 1905ffd83dbSDimitry Andric } 1915ffd83dbSDimitry Andric 1925ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 1935ffd83dbSDimitry Andric if (skipFunction(MF.getFunction())) 1945ffd83dbSDimitry Andric return false; 1955ffd83dbSDimitry Andric 196fe6060f1SDimitry Andric ST = &MF.getSubtarget<GCNSubtarget>(); 197fe6060f1SDimitry Andric if (!ST->hasHardClauses()) 1985ffd83dbSDimitry Andric return false; 1995ffd83dbSDimitry Andric 200fe6060f1SDimitry Andric const SIInstrInfo *SII = ST->getInstrInfo(); 201fe6060f1SDimitry Andric const TargetRegisterInfo *TRI = ST->getRegisterInfo(); 2025ffd83dbSDimitry Andric 2035ffd83dbSDimitry Andric bool Changed = false; 2045ffd83dbSDimitry Andric for (auto &MBB : MF) { 2055ffd83dbSDimitry Andric ClauseInfo CI; 2065ffd83dbSDimitry Andric for (auto &MI : MBB) { 2075ffd83dbSDimitry Andric HardClauseType Type = getHardClauseType(MI); 2085ffd83dbSDimitry Andric 2095ffd83dbSDimitry Andric int64_t Dummy1; 2105ffd83dbSDimitry Andric bool Dummy2; 211*0fca6ea1SDimitry Andric LocationSize Dummy3 = 0; 2125ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps; 2135ffd83dbSDimitry Andric if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 2145ffd83dbSDimitry Andric if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 2155ffd83dbSDimitry Andric Dummy3, TRI)) { 2165ffd83dbSDimitry Andric // We failed to get the base operands, so we'll never clause this 2175ffd83dbSDimitry Andric // instruction with any other, so pretend it's illegal. 2185ffd83dbSDimitry Andric Type = HARDCLAUSE_ILLEGAL; 2195ffd83dbSDimitry Andric } 2205ffd83dbSDimitry Andric } 2215ffd83dbSDimitry Andric 222*0fca6ea1SDimitry Andric if (CI.Length == ST->maxHardClauseLength() || 2235ffd83dbSDimitry Andric (CI.Length && Type != HARDCLAUSE_INTERNAL && 224349cc55cSDimitry Andric Type != HARDCLAUSE_IGNORE && 2255ffd83dbSDimitry Andric (Type != CI.Type || 2265ffd83dbSDimitry Andric // Note that we lie to shouldClusterMemOps about the size of the 2275ffd83dbSDimitry Andric // cluster. When shouldClusterMemOps is called from the machine 2285ffd83dbSDimitry Andric // scheduler it limits the size of the cluster to avoid increasing 2295ffd83dbSDimitry Andric // register pressure too much, but this pass runs after register 2305ffd83dbSDimitry Andric // allocation so there is no need for that kind of limit. 2315f757f3fSDimitry Andric // We also lie about the Offset and OffsetIsScalable parameters, 2325f757f3fSDimitry Andric // as they aren't used in the SIInstrInfo implementation. 2335f757f3fSDimitry Andric !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false, 2345f757f3fSDimitry Andric 2, 2)))) { 2355ffd83dbSDimitry Andric // Finish the current clause. 2365ffd83dbSDimitry Andric Changed |= emitClause(CI, SII); 2375ffd83dbSDimitry Andric CI = ClauseInfo(); 2385ffd83dbSDimitry Andric } 2395ffd83dbSDimitry Andric 2405ffd83dbSDimitry Andric if (CI.Length) { 2415ffd83dbSDimitry Andric // Extend the current clause. 242349cc55cSDimitry Andric if (Type != HARDCLAUSE_IGNORE) { 243349cc55cSDimitry Andric if (Type == HARDCLAUSE_INTERNAL) { 244349cc55cSDimitry Andric ++CI.TrailingInternalLength; 245349cc55cSDimitry Andric } else { 2465ffd83dbSDimitry Andric ++CI.Length; 247349cc55cSDimitry Andric CI.Length += CI.TrailingInternalLength; 248349cc55cSDimitry Andric CI.TrailingInternalLength = 0; 2495ffd83dbSDimitry Andric CI.Last = &MI; 2505ffd83dbSDimitry Andric CI.BaseOps = std::move(BaseOps); 2515ffd83dbSDimitry Andric } 252349cc55cSDimitry Andric } 2535ffd83dbSDimitry Andric } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 2545ffd83dbSDimitry Andric // Start a new clause. 255349cc55cSDimitry Andric CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)}; 2565ffd83dbSDimitry Andric } 2575ffd83dbSDimitry Andric } 2585ffd83dbSDimitry Andric 2595ffd83dbSDimitry Andric // Finish the last clause in the basic block if any. 2605ffd83dbSDimitry Andric if (CI.Length) 2615ffd83dbSDimitry Andric Changed |= emitClause(CI, SII); 2625ffd83dbSDimitry Andric } 2635ffd83dbSDimitry Andric 2645ffd83dbSDimitry Andric return Changed; 2655ffd83dbSDimitry Andric } 2665ffd83dbSDimitry Andric }; 2675ffd83dbSDimitry Andric 2685ffd83dbSDimitry Andric } // namespace 2695ffd83dbSDimitry Andric 2705ffd83dbSDimitry Andric char SIInsertHardClauses::ID = 0; 2715ffd83dbSDimitry Andric 2725ffd83dbSDimitry Andric char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 2735ffd83dbSDimitry Andric 2745ffd83dbSDimitry Andric INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 2755ffd83dbSDimitry Andric false, false) 276