xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
15ffd83dbSDimitry Andric //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering  -------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric /// \file This file contains a DAG scheduling mutation to cluster shader
105ffd83dbSDimitry Andric ///       exports.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
145ffd83dbSDimitry Andric #include "AMDGPUExportClustering.h"
155ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
165ffd83dbSDimitry Andric #include "SIInstrInfo.h"
17e8d8bef9SDimitry Andric #include "llvm/CodeGen/ScheduleDAGInstrs.h"
185ffd83dbSDimitry Andric 
195ffd83dbSDimitry Andric using namespace llvm;
205ffd83dbSDimitry Andric 
215ffd83dbSDimitry Andric namespace {
225ffd83dbSDimitry Andric 
235ffd83dbSDimitry Andric class ExportClustering : public ScheduleDAGMutation {
245ffd83dbSDimitry Andric public:
25*81ad6265SDimitry Andric   ExportClustering() = default;
265ffd83dbSDimitry Andric   void apply(ScheduleDAGInstrs *DAG) override;
275ffd83dbSDimitry Andric };
285ffd83dbSDimitry Andric 
isExport(const SUnit & SU)295ffd83dbSDimitry Andric static bool isExport(const SUnit &SU) {
30e8d8bef9SDimitry Andric   return SIInstrInfo::isEXP(*SU.getInstr());
315ffd83dbSDimitry Andric }
325ffd83dbSDimitry Andric 
isPositionExport(const SIInstrInfo * TII,SUnit * SU)335ffd83dbSDimitry Andric static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
345ffd83dbSDimitry Andric   const MachineInstr *MI = SU->getInstr();
35e8d8bef9SDimitry Andric   unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
36e8d8bef9SDimitry Andric   return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
375ffd83dbSDimitry Andric }
385ffd83dbSDimitry Andric 
sortChain(const SIInstrInfo * TII,SmallVector<SUnit *,8> & Chain,unsigned PosCount)395ffd83dbSDimitry Andric static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
405ffd83dbSDimitry Andric                       unsigned PosCount) {
415ffd83dbSDimitry Andric   if (!PosCount || PosCount == Chain.size())
425ffd83dbSDimitry Andric     return;
435ffd83dbSDimitry Andric 
445ffd83dbSDimitry Andric   // Position exports should occur as soon as possible in the shader
455ffd83dbSDimitry Andric   // for optimal performance.  This moves position exports before
465ffd83dbSDimitry Andric   // other exports while preserving the order within different export
475ffd83dbSDimitry Andric   // types (pos or other).
485ffd83dbSDimitry Andric   SmallVector<SUnit *, 8> Copy(Chain);
495ffd83dbSDimitry Andric   unsigned PosIdx = 0;
505ffd83dbSDimitry Andric   unsigned OtherIdx = PosCount;
515ffd83dbSDimitry Andric   for (SUnit *SU : Copy) {
525ffd83dbSDimitry Andric     if (isPositionExport(TII, SU))
535ffd83dbSDimitry Andric       Chain[PosIdx++] = SU;
545ffd83dbSDimitry Andric     else
555ffd83dbSDimitry Andric       Chain[OtherIdx++] = SU;
565ffd83dbSDimitry Andric   }
575ffd83dbSDimitry Andric }
585ffd83dbSDimitry Andric 
buildCluster(ArrayRef<SUnit * > Exports,ScheduleDAGInstrs * DAG)595ffd83dbSDimitry Andric static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
605ffd83dbSDimitry Andric   SUnit *ChainHead = Exports.front();
615ffd83dbSDimitry Andric 
625ffd83dbSDimitry Andric   // Now construct cluster from chain by adding new edges.
635ffd83dbSDimitry Andric   for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
645ffd83dbSDimitry Andric     SUnit *SUa = Exports[Idx];
655ffd83dbSDimitry Andric     SUnit *SUb = Exports[Idx + 1];
665ffd83dbSDimitry Andric 
675ffd83dbSDimitry Andric     // Copy all dependencies to the head of the chain to avoid any
685ffd83dbSDimitry Andric     // computation being inserted into the chain.
695ffd83dbSDimitry Andric     for (const SDep &Pred : SUb->Preds) {
705ffd83dbSDimitry Andric       SUnit *PredSU = Pred.getSUnit();
715ffd83dbSDimitry Andric       if (!isExport(*PredSU) && !Pred.isWeak())
725ffd83dbSDimitry Andric         DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
735ffd83dbSDimitry Andric     }
745ffd83dbSDimitry Andric 
755ffd83dbSDimitry Andric     // New barrier edge ordering exports
765ffd83dbSDimitry Andric     DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
775ffd83dbSDimitry Andric     // Also add cluster edge
785ffd83dbSDimitry Andric     DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
795ffd83dbSDimitry Andric   }
805ffd83dbSDimitry Andric }
815ffd83dbSDimitry Andric 
removeExportDependencies(ScheduleDAGInstrs * DAG,SUnit & SU)825ffd83dbSDimitry Andric static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
835ffd83dbSDimitry Andric   SmallVector<SDep, 2> ToAdd, ToRemove;
845ffd83dbSDimitry Andric 
855ffd83dbSDimitry Andric   for (const SDep &Pred : SU.Preds) {
865ffd83dbSDimitry Andric     SUnit *PredSU = Pred.getSUnit();
875ffd83dbSDimitry Andric     if (Pred.isBarrier() && isExport(*PredSU)) {
885ffd83dbSDimitry Andric       ToRemove.push_back(Pred);
895ffd83dbSDimitry Andric       if (isExport(SU))
905ffd83dbSDimitry Andric         continue;
915ffd83dbSDimitry Andric 
925ffd83dbSDimitry Andric       // If we remove a barrier we need to copy dependencies
935ffd83dbSDimitry Andric       // from the predecessor to maintain order.
945ffd83dbSDimitry Andric       for (const SDep &ExportPred : PredSU->Preds) {
955ffd83dbSDimitry Andric         SUnit *ExportPredSU = ExportPred.getSUnit();
965ffd83dbSDimitry Andric         if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
975ffd83dbSDimitry Andric           ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
985ffd83dbSDimitry Andric       }
995ffd83dbSDimitry Andric     }
1005ffd83dbSDimitry Andric   }
1015ffd83dbSDimitry Andric 
1025ffd83dbSDimitry Andric   for (SDep Pred : ToRemove)
1035ffd83dbSDimitry Andric     SU.removePred(Pred);
1045ffd83dbSDimitry Andric   for (SDep Pred : ToAdd)
1055ffd83dbSDimitry Andric     DAG->addEdge(&SU, Pred);
1065ffd83dbSDimitry Andric }
1075ffd83dbSDimitry Andric 
apply(ScheduleDAGInstrs * DAG)1085ffd83dbSDimitry Andric void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
1095ffd83dbSDimitry Andric   const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
1105ffd83dbSDimitry Andric 
1115ffd83dbSDimitry Andric   SmallVector<SUnit *, 8> Chain;
1125ffd83dbSDimitry Andric 
1135ffd83dbSDimitry Andric   // Pass through DAG gathering a list of exports and removing barrier edges
1145ffd83dbSDimitry Andric   // creating dependencies on exports. Freeing exports of successor edges
1155ffd83dbSDimitry Andric   // allows more scheduling freedom, and nothing should be order dependent
1165ffd83dbSDimitry Andric   // on exports.  Edges will be added later to order the exports.
1175ffd83dbSDimitry Andric   unsigned PosCount = 0;
1185ffd83dbSDimitry Andric   for (SUnit &SU : DAG->SUnits) {
1195ffd83dbSDimitry Andric     if (!isExport(SU))
1205ffd83dbSDimitry Andric       continue;
1215ffd83dbSDimitry Andric 
1225ffd83dbSDimitry Andric     Chain.push_back(&SU);
1235ffd83dbSDimitry Andric     if (isPositionExport(TII, &SU))
1245ffd83dbSDimitry Andric       PosCount++;
1255ffd83dbSDimitry Andric 
1265ffd83dbSDimitry Andric     removeExportDependencies(DAG, SU);
1275ffd83dbSDimitry Andric 
1285ffd83dbSDimitry Andric     SmallVector<SDep, 4> Succs(SU.Succs);
1295ffd83dbSDimitry Andric     for (SDep Succ : Succs)
1305ffd83dbSDimitry Andric       removeExportDependencies(DAG, *Succ.getSUnit());
1315ffd83dbSDimitry Andric   }
1325ffd83dbSDimitry Andric 
1335ffd83dbSDimitry Andric   // Apply clustering if there are multiple exports
1345ffd83dbSDimitry Andric   if (Chain.size() > 1) {
1355ffd83dbSDimitry Andric     sortChain(TII, Chain, PosCount);
1365ffd83dbSDimitry Andric     buildCluster(Chain, DAG);
1375ffd83dbSDimitry Andric   }
1385ffd83dbSDimitry Andric }
1395ffd83dbSDimitry Andric 
1405ffd83dbSDimitry Andric } // end namespace
1415ffd83dbSDimitry Andric 
1425ffd83dbSDimitry Andric namespace llvm {
1435ffd83dbSDimitry Andric 
createAMDGPUExportClusteringDAGMutation()1445ffd83dbSDimitry Andric std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
1455ffd83dbSDimitry Andric   return std::make_unique<ExportClustering>();
1465ffd83dbSDimitry Andric }
1475ffd83dbSDimitry Andric 
1485ffd83dbSDimitry Andric } // end namespace llvm
149