1*5ffd83dbSDimitry Andric //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5ffd83dbSDimitry Andric // 7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 8*5ffd83dbSDimitry Andric // 9*5ffd83dbSDimitry Andric /// \file This file contains a DAG scheduling mutation to cluster shader 10*5ffd83dbSDimitry Andric /// exports. 11*5ffd83dbSDimitry Andric // 12*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 13*5ffd83dbSDimitry Andric 14*5ffd83dbSDimitry Andric #include "AMDGPUExportClustering.h" 15*5ffd83dbSDimitry Andric #include "AMDGPUSubtarget.h" 16*5ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17*5ffd83dbSDimitry Andric #include "SIInstrInfo.h" 18*5ffd83dbSDimitry Andric 19*5ffd83dbSDimitry Andric using namespace llvm; 20*5ffd83dbSDimitry Andric 21*5ffd83dbSDimitry Andric namespace { 22*5ffd83dbSDimitry Andric 23*5ffd83dbSDimitry Andric class ExportClustering : public ScheduleDAGMutation { 24*5ffd83dbSDimitry Andric public: 25*5ffd83dbSDimitry Andric ExportClustering() {} 26*5ffd83dbSDimitry Andric void apply(ScheduleDAGInstrs *DAG) override; 27*5ffd83dbSDimitry Andric }; 28*5ffd83dbSDimitry Andric 29*5ffd83dbSDimitry Andric static bool isExport(const SUnit &SU) { 30*5ffd83dbSDimitry Andric const MachineInstr *MI = SU.getInstr(); 31*5ffd83dbSDimitry Andric return MI->getOpcode() == AMDGPU::EXP || 32*5ffd83dbSDimitry Andric MI->getOpcode() == AMDGPU::EXP_DONE; 33*5ffd83dbSDimitry Andric } 34*5ffd83dbSDimitry Andric 35*5ffd83dbSDimitry Andric static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { 36*5ffd83dbSDimitry Andric const MachineInstr *MI = SU->getInstr(); 37*5ffd83dbSDimitry Andric int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm(); 38*5ffd83dbSDimitry Andric return Imm >= 12 && Imm <= 15; 39*5ffd83dbSDimitry Andric } 40*5ffd83dbSDimitry Andric 41*5ffd83dbSDimitry Andric static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain, 42*5ffd83dbSDimitry Andric unsigned PosCount) { 43*5ffd83dbSDimitry Andric if (!PosCount || PosCount == Chain.size()) 44*5ffd83dbSDimitry Andric return; 45*5ffd83dbSDimitry Andric 46*5ffd83dbSDimitry Andric // Position exports should occur as soon as possible in the shader 47*5ffd83dbSDimitry Andric // for optimal performance. This moves position exports before 48*5ffd83dbSDimitry Andric // other exports while preserving the order within different export 49*5ffd83dbSDimitry Andric // types (pos or other). 50*5ffd83dbSDimitry Andric SmallVector<SUnit *, 8> Copy(Chain); 51*5ffd83dbSDimitry Andric unsigned PosIdx = 0; 52*5ffd83dbSDimitry Andric unsigned OtherIdx = PosCount; 53*5ffd83dbSDimitry Andric for (SUnit *SU : Copy) { 54*5ffd83dbSDimitry Andric if (isPositionExport(TII, SU)) 55*5ffd83dbSDimitry Andric Chain[PosIdx++] = SU; 56*5ffd83dbSDimitry Andric else 57*5ffd83dbSDimitry Andric Chain[OtherIdx++] = SU; 58*5ffd83dbSDimitry Andric } 59*5ffd83dbSDimitry Andric } 60*5ffd83dbSDimitry Andric 61*5ffd83dbSDimitry Andric static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) { 62*5ffd83dbSDimitry Andric SUnit *ChainHead = Exports.front(); 63*5ffd83dbSDimitry Andric 64*5ffd83dbSDimitry Andric // Now construct cluster from chain by adding new edges. 65*5ffd83dbSDimitry Andric for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { 66*5ffd83dbSDimitry Andric SUnit *SUa = Exports[Idx]; 67*5ffd83dbSDimitry Andric SUnit *SUb = Exports[Idx + 1]; 68*5ffd83dbSDimitry Andric 69*5ffd83dbSDimitry Andric // Copy all dependencies to the head of the chain to avoid any 70*5ffd83dbSDimitry Andric // computation being inserted into the chain. 71*5ffd83dbSDimitry Andric for (const SDep &Pred : SUb->Preds) { 72*5ffd83dbSDimitry Andric SUnit *PredSU = Pred.getSUnit(); 73*5ffd83dbSDimitry Andric if (!isExport(*PredSU) && !Pred.isWeak()) 74*5ffd83dbSDimitry Andric DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial)); 75*5ffd83dbSDimitry Andric } 76*5ffd83dbSDimitry Andric 77*5ffd83dbSDimitry Andric // New barrier edge ordering exports 78*5ffd83dbSDimitry Andric DAG->addEdge(SUb, SDep(SUa, SDep::Barrier)); 79*5ffd83dbSDimitry Andric // Also add cluster edge 80*5ffd83dbSDimitry Andric DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)); 81*5ffd83dbSDimitry Andric } 82*5ffd83dbSDimitry Andric } 83*5ffd83dbSDimitry Andric 84*5ffd83dbSDimitry Andric static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) { 85*5ffd83dbSDimitry Andric SmallVector<SDep, 2> ToAdd, ToRemove; 86*5ffd83dbSDimitry Andric 87*5ffd83dbSDimitry Andric for (const SDep &Pred : SU.Preds) { 88*5ffd83dbSDimitry Andric SUnit *PredSU = Pred.getSUnit(); 89*5ffd83dbSDimitry Andric if (Pred.isBarrier() && isExport(*PredSU)) { 90*5ffd83dbSDimitry Andric ToRemove.push_back(Pred); 91*5ffd83dbSDimitry Andric if (isExport(SU)) 92*5ffd83dbSDimitry Andric continue; 93*5ffd83dbSDimitry Andric 94*5ffd83dbSDimitry Andric // If we remove a barrier we need to copy dependencies 95*5ffd83dbSDimitry Andric // from the predecessor to maintain order. 96*5ffd83dbSDimitry Andric for (const SDep &ExportPred : PredSU->Preds) { 97*5ffd83dbSDimitry Andric SUnit *ExportPredSU = ExportPred.getSUnit(); 98*5ffd83dbSDimitry Andric if (ExportPred.isBarrier() && !isExport(*ExportPredSU)) 99*5ffd83dbSDimitry Andric ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier)); 100*5ffd83dbSDimitry Andric } 101*5ffd83dbSDimitry Andric } 102*5ffd83dbSDimitry Andric } 103*5ffd83dbSDimitry Andric 104*5ffd83dbSDimitry Andric for (SDep Pred : ToRemove) 105*5ffd83dbSDimitry Andric SU.removePred(Pred); 106*5ffd83dbSDimitry Andric for (SDep Pred : ToAdd) 107*5ffd83dbSDimitry Andric DAG->addEdge(&SU, Pred); 108*5ffd83dbSDimitry Andric } 109*5ffd83dbSDimitry Andric 110*5ffd83dbSDimitry Andric void ExportClustering::apply(ScheduleDAGInstrs *DAG) { 111*5ffd83dbSDimitry Andric const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII); 112*5ffd83dbSDimitry Andric 113*5ffd83dbSDimitry Andric SmallVector<SUnit *, 8> Chain; 114*5ffd83dbSDimitry Andric 115*5ffd83dbSDimitry Andric // Pass through DAG gathering a list of exports and removing barrier edges 116*5ffd83dbSDimitry Andric // creating dependencies on exports. Freeing exports of successor edges 117*5ffd83dbSDimitry Andric // allows more scheduling freedom, and nothing should be order dependent 118*5ffd83dbSDimitry Andric // on exports. Edges will be added later to order the exports. 119*5ffd83dbSDimitry Andric unsigned PosCount = 0; 120*5ffd83dbSDimitry Andric for (SUnit &SU : DAG->SUnits) { 121*5ffd83dbSDimitry Andric if (!isExport(SU)) 122*5ffd83dbSDimitry Andric continue; 123*5ffd83dbSDimitry Andric 124*5ffd83dbSDimitry Andric Chain.push_back(&SU); 125*5ffd83dbSDimitry Andric if (isPositionExport(TII, &SU)) 126*5ffd83dbSDimitry Andric PosCount++; 127*5ffd83dbSDimitry Andric 128*5ffd83dbSDimitry Andric removeExportDependencies(DAG, SU); 129*5ffd83dbSDimitry Andric 130*5ffd83dbSDimitry Andric SmallVector<SDep, 4> Succs(SU.Succs); 131*5ffd83dbSDimitry Andric for (SDep Succ : Succs) 132*5ffd83dbSDimitry Andric removeExportDependencies(DAG, *Succ.getSUnit()); 133*5ffd83dbSDimitry Andric } 134*5ffd83dbSDimitry Andric 135*5ffd83dbSDimitry Andric // Apply clustering if there are multiple exports 136*5ffd83dbSDimitry Andric if (Chain.size() > 1) { 137*5ffd83dbSDimitry Andric sortChain(TII, Chain, PosCount); 138*5ffd83dbSDimitry Andric buildCluster(Chain, DAG); 139*5ffd83dbSDimitry Andric } 140*5ffd83dbSDimitry Andric } 141*5ffd83dbSDimitry Andric 142*5ffd83dbSDimitry Andric } // end namespace 143*5ffd83dbSDimitry Andric 144*5ffd83dbSDimitry Andric namespace llvm { 145*5ffd83dbSDimitry Andric 146*5ffd83dbSDimitry Andric std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() { 147*5ffd83dbSDimitry Andric return std::make_unique<ExportClustering>(); 148*5ffd83dbSDimitry Andric } 149*5ffd83dbSDimitry Andric 150*5ffd83dbSDimitry Andric } // end namespace llvm 151