15ffd83dbSDimitry Andric //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric /// \file This file contains a DAG scheduling mutation to cluster shader 105ffd83dbSDimitry Andric /// exports. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 145ffd83dbSDimitry Andric #include "AMDGPUExportClustering.h" 155ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 165ffd83dbSDimitry Andric #include "SIInstrInfo.h" 17*e8d8bef9SDimitry Andric #include "llvm/CodeGen/ScheduleDAGInstrs.h" 185ffd83dbSDimitry Andric 195ffd83dbSDimitry Andric using namespace llvm; 205ffd83dbSDimitry Andric 215ffd83dbSDimitry Andric namespace { 225ffd83dbSDimitry Andric 235ffd83dbSDimitry Andric class ExportClustering : public ScheduleDAGMutation { 245ffd83dbSDimitry Andric public: 255ffd83dbSDimitry Andric ExportClustering() {} 265ffd83dbSDimitry Andric void apply(ScheduleDAGInstrs *DAG) override; 275ffd83dbSDimitry Andric }; 285ffd83dbSDimitry Andric 295ffd83dbSDimitry Andric static bool isExport(const SUnit &SU) { 30*e8d8bef9SDimitry Andric return SIInstrInfo::isEXP(*SU.getInstr()); 315ffd83dbSDimitry Andric } 325ffd83dbSDimitry Andric 335ffd83dbSDimitry Andric static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { 345ffd83dbSDimitry Andric const MachineInstr *MI = SU->getInstr(); 35*e8d8bef9SDimitry Andric unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm(); 36*e8d8bef9SDimitry Andric return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST; 375ffd83dbSDimitry Andric } 385ffd83dbSDimitry Andric 395ffd83dbSDimitry Andric static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain, 405ffd83dbSDimitry Andric unsigned PosCount) { 415ffd83dbSDimitry Andric if (!PosCount || PosCount == Chain.size()) 425ffd83dbSDimitry Andric return; 435ffd83dbSDimitry Andric 445ffd83dbSDimitry Andric // Position exports should occur as soon as possible in the shader 455ffd83dbSDimitry Andric // for optimal performance. This moves position exports before 465ffd83dbSDimitry Andric // other exports while preserving the order within different export 475ffd83dbSDimitry Andric // types (pos or other). 485ffd83dbSDimitry Andric SmallVector<SUnit *, 8> Copy(Chain); 495ffd83dbSDimitry Andric unsigned PosIdx = 0; 505ffd83dbSDimitry Andric unsigned OtherIdx = PosCount; 515ffd83dbSDimitry Andric for (SUnit *SU : Copy) { 525ffd83dbSDimitry Andric if (isPositionExport(TII, SU)) 535ffd83dbSDimitry Andric Chain[PosIdx++] = SU; 545ffd83dbSDimitry Andric else 555ffd83dbSDimitry Andric Chain[OtherIdx++] = SU; 565ffd83dbSDimitry Andric } 575ffd83dbSDimitry Andric } 585ffd83dbSDimitry Andric 595ffd83dbSDimitry Andric static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) { 605ffd83dbSDimitry Andric SUnit *ChainHead = Exports.front(); 615ffd83dbSDimitry Andric 625ffd83dbSDimitry Andric // Now construct cluster from chain by adding new edges. 635ffd83dbSDimitry Andric for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { 645ffd83dbSDimitry Andric SUnit *SUa = Exports[Idx]; 655ffd83dbSDimitry Andric SUnit *SUb = Exports[Idx + 1]; 665ffd83dbSDimitry Andric 675ffd83dbSDimitry Andric // Copy all dependencies to the head of the chain to avoid any 685ffd83dbSDimitry Andric // computation being inserted into the chain. 695ffd83dbSDimitry Andric for (const SDep &Pred : SUb->Preds) { 705ffd83dbSDimitry Andric SUnit *PredSU = Pred.getSUnit(); 715ffd83dbSDimitry Andric if (!isExport(*PredSU) && !Pred.isWeak()) 725ffd83dbSDimitry Andric DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial)); 735ffd83dbSDimitry Andric } 745ffd83dbSDimitry Andric 755ffd83dbSDimitry Andric // New barrier edge ordering exports 765ffd83dbSDimitry Andric DAG->addEdge(SUb, SDep(SUa, SDep::Barrier)); 775ffd83dbSDimitry Andric // Also add cluster edge 785ffd83dbSDimitry Andric DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)); 795ffd83dbSDimitry Andric } 805ffd83dbSDimitry Andric } 815ffd83dbSDimitry Andric 825ffd83dbSDimitry Andric static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) { 835ffd83dbSDimitry Andric SmallVector<SDep, 2> ToAdd, ToRemove; 845ffd83dbSDimitry Andric 855ffd83dbSDimitry Andric for (const SDep &Pred : SU.Preds) { 865ffd83dbSDimitry Andric SUnit *PredSU = Pred.getSUnit(); 875ffd83dbSDimitry Andric if (Pred.isBarrier() && isExport(*PredSU)) { 885ffd83dbSDimitry Andric ToRemove.push_back(Pred); 895ffd83dbSDimitry Andric if (isExport(SU)) 905ffd83dbSDimitry Andric continue; 915ffd83dbSDimitry Andric 925ffd83dbSDimitry Andric // If we remove a barrier we need to copy dependencies 935ffd83dbSDimitry Andric // from the predecessor to maintain order. 945ffd83dbSDimitry Andric for (const SDep &ExportPred : PredSU->Preds) { 955ffd83dbSDimitry Andric SUnit *ExportPredSU = ExportPred.getSUnit(); 965ffd83dbSDimitry Andric if (ExportPred.isBarrier() && !isExport(*ExportPredSU)) 975ffd83dbSDimitry Andric ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier)); 985ffd83dbSDimitry Andric } 995ffd83dbSDimitry Andric } 1005ffd83dbSDimitry Andric } 1015ffd83dbSDimitry Andric 1025ffd83dbSDimitry Andric for (SDep Pred : ToRemove) 1035ffd83dbSDimitry Andric SU.removePred(Pred); 1045ffd83dbSDimitry Andric for (SDep Pred : ToAdd) 1055ffd83dbSDimitry Andric DAG->addEdge(&SU, Pred); 1065ffd83dbSDimitry Andric } 1075ffd83dbSDimitry Andric 1085ffd83dbSDimitry Andric void ExportClustering::apply(ScheduleDAGInstrs *DAG) { 1095ffd83dbSDimitry Andric const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII); 1105ffd83dbSDimitry Andric 1115ffd83dbSDimitry Andric SmallVector<SUnit *, 8> Chain; 1125ffd83dbSDimitry Andric 1135ffd83dbSDimitry Andric // Pass through DAG gathering a list of exports and removing barrier edges 1145ffd83dbSDimitry Andric // creating dependencies on exports. Freeing exports of successor edges 1155ffd83dbSDimitry Andric // allows more scheduling freedom, and nothing should be order dependent 1165ffd83dbSDimitry Andric // on exports. Edges will be added later to order the exports. 1175ffd83dbSDimitry Andric unsigned PosCount = 0; 1185ffd83dbSDimitry Andric for (SUnit &SU : DAG->SUnits) { 1195ffd83dbSDimitry Andric if (!isExport(SU)) 1205ffd83dbSDimitry Andric continue; 1215ffd83dbSDimitry Andric 1225ffd83dbSDimitry Andric Chain.push_back(&SU); 1235ffd83dbSDimitry Andric if (isPositionExport(TII, &SU)) 1245ffd83dbSDimitry Andric PosCount++; 1255ffd83dbSDimitry Andric 1265ffd83dbSDimitry Andric removeExportDependencies(DAG, SU); 1275ffd83dbSDimitry Andric 1285ffd83dbSDimitry Andric SmallVector<SDep, 4> Succs(SU.Succs); 1295ffd83dbSDimitry Andric for (SDep Succ : Succs) 1305ffd83dbSDimitry Andric removeExportDependencies(DAG, *Succ.getSUnit()); 1315ffd83dbSDimitry Andric } 1325ffd83dbSDimitry Andric 1335ffd83dbSDimitry Andric // Apply clustering if there are multiple exports 1345ffd83dbSDimitry Andric if (Chain.size() > 1) { 1355ffd83dbSDimitry Andric sortChain(TII, Chain, PosCount); 1365ffd83dbSDimitry Andric buildCluster(Chain, DAG); 1375ffd83dbSDimitry Andric } 1385ffd83dbSDimitry Andric } 1395ffd83dbSDimitry Andric 1405ffd83dbSDimitry Andric } // end namespace 1415ffd83dbSDimitry Andric 1425ffd83dbSDimitry Andric namespace llvm { 1435ffd83dbSDimitry Andric 1445ffd83dbSDimitry Andric std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() { 1455ffd83dbSDimitry Andric return std::make_unique<ExportClustering>(); 1465ffd83dbSDimitry Andric } 1475ffd83dbSDimitry Andric 1485ffd83dbSDimitry Andric } // end namespace llvm 149