xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1*5ffd83dbSDimitry Andric //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering  -------------===//
2*5ffd83dbSDimitry Andric //
3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5ffd83dbSDimitry Andric //
7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
8*5ffd83dbSDimitry Andric //
9*5ffd83dbSDimitry Andric /// \file This file contains a DAG scheduling mutation to cluster shader
10*5ffd83dbSDimitry Andric ///       exports.
11*5ffd83dbSDimitry Andric //
12*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
13*5ffd83dbSDimitry Andric 
14*5ffd83dbSDimitry Andric #include "AMDGPUExportClustering.h"
15*5ffd83dbSDimitry Andric #include "AMDGPUSubtarget.h"
16*5ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17*5ffd83dbSDimitry Andric #include "SIInstrInfo.h"
18*5ffd83dbSDimitry Andric 
19*5ffd83dbSDimitry Andric using namespace llvm;
20*5ffd83dbSDimitry Andric 
21*5ffd83dbSDimitry Andric namespace {
22*5ffd83dbSDimitry Andric 
23*5ffd83dbSDimitry Andric class ExportClustering : public ScheduleDAGMutation {
24*5ffd83dbSDimitry Andric public:
25*5ffd83dbSDimitry Andric   ExportClustering() {}
26*5ffd83dbSDimitry Andric   void apply(ScheduleDAGInstrs *DAG) override;
27*5ffd83dbSDimitry Andric };
28*5ffd83dbSDimitry Andric 
29*5ffd83dbSDimitry Andric static bool isExport(const SUnit &SU) {
30*5ffd83dbSDimitry Andric   const MachineInstr *MI = SU.getInstr();
31*5ffd83dbSDimitry Andric   return MI->getOpcode() == AMDGPU::EXP ||
32*5ffd83dbSDimitry Andric          MI->getOpcode() == AMDGPU::EXP_DONE;
33*5ffd83dbSDimitry Andric }
34*5ffd83dbSDimitry Andric 
35*5ffd83dbSDimitry Andric static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
36*5ffd83dbSDimitry Andric   const MachineInstr *MI = SU->getInstr();
37*5ffd83dbSDimitry Andric   int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
38*5ffd83dbSDimitry Andric   return Imm >= 12 && Imm <= 15;
39*5ffd83dbSDimitry Andric }
40*5ffd83dbSDimitry Andric 
41*5ffd83dbSDimitry Andric static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
42*5ffd83dbSDimitry Andric                       unsigned PosCount) {
43*5ffd83dbSDimitry Andric   if (!PosCount || PosCount == Chain.size())
44*5ffd83dbSDimitry Andric     return;
45*5ffd83dbSDimitry Andric 
46*5ffd83dbSDimitry Andric   // Position exports should occur as soon as possible in the shader
47*5ffd83dbSDimitry Andric   // for optimal performance.  This moves position exports before
48*5ffd83dbSDimitry Andric   // other exports while preserving the order within different export
49*5ffd83dbSDimitry Andric   // types (pos or other).
50*5ffd83dbSDimitry Andric   SmallVector<SUnit *, 8> Copy(Chain);
51*5ffd83dbSDimitry Andric   unsigned PosIdx = 0;
52*5ffd83dbSDimitry Andric   unsigned OtherIdx = PosCount;
53*5ffd83dbSDimitry Andric   for (SUnit *SU : Copy) {
54*5ffd83dbSDimitry Andric     if (isPositionExport(TII, SU))
55*5ffd83dbSDimitry Andric       Chain[PosIdx++] = SU;
56*5ffd83dbSDimitry Andric     else
57*5ffd83dbSDimitry Andric       Chain[OtherIdx++] = SU;
58*5ffd83dbSDimitry Andric   }
59*5ffd83dbSDimitry Andric }
60*5ffd83dbSDimitry Andric 
61*5ffd83dbSDimitry Andric static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
62*5ffd83dbSDimitry Andric   SUnit *ChainHead = Exports.front();
63*5ffd83dbSDimitry Andric 
64*5ffd83dbSDimitry Andric   // Now construct cluster from chain by adding new edges.
65*5ffd83dbSDimitry Andric   for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
66*5ffd83dbSDimitry Andric     SUnit *SUa = Exports[Idx];
67*5ffd83dbSDimitry Andric     SUnit *SUb = Exports[Idx + 1];
68*5ffd83dbSDimitry Andric 
69*5ffd83dbSDimitry Andric     // Copy all dependencies to the head of the chain to avoid any
70*5ffd83dbSDimitry Andric     // computation being inserted into the chain.
71*5ffd83dbSDimitry Andric     for (const SDep &Pred : SUb->Preds) {
72*5ffd83dbSDimitry Andric       SUnit *PredSU = Pred.getSUnit();
73*5ffd83dbSDimitry Andric       if (!isExport(*PredSU) && !Pred.isWeak())
74*5ffd83dbSDimitry Andric         DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
75*5ffd83dbSDimitry Andric     }
76*5ffd83dbSDimitry Andric 
77*5ffd83dbSDimitry Andric     // New barrier edge ordering exports
78*5ffd83dbSDimitry Andric     DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
79*5ffd83dbSDimitry Andric     // Also add cluster edge
80*5ffd83dbSDimitry Andric     DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
81*5ffd83dbSDimitry Andric   }
82*5ffd83dbSDimitry Andric }
83*5ffd83dbSDimitry Andric 
84*5ffd83dbSDimitry Andric static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
85*5ffd83dbSDimitry Andric   SmallVector<SDep, 2> ToAdd, ToRemove;
86*5ffd83dbSDimitry Andric 
87*5ffd83dbSDimitry Andric   for (const SDep &Pred : SU.Preds) {
88*5ffd83dbSDimitry Andric     SUnit *PredSU = Pred.getSUnit();
89*5ffd83dbSDimitry Andric     if (Pred.isBarrier() && isExport(*PredSU)) {
90*5ffd83dbSDimitry Andric       ToRemove.push_back(Pred);
91*5ffd83dbSDimitry Andric       if (isExport(SU))
92*5ffd83dbSDimitry Andric         continue;
93*5ffd83dbSDimitry Andric 
94*5ffd83dbSDimitry Andric       // If we remove a barrier we need to copy dependencies
95*5ffd83dbSDimitry Andric       // from the predecessor to maintain order.
96*5ffd83dbSDimitry Andric       for (const SDep &ExportPred : PredSU->Preds) {
97*5ffd83dbSDimitry Andric         SUnit *ExportPredSU = ExportPred.getSUnit();
98*5ffd83dbSDimitry Andric         if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
99*5ffd83dbSDimitry Andric           ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
100*5ffd83dbSDimitry Andric       }
101*5ffd83dbSDimitry Andric     }
102*5ffd83dbSDimitry Andric   }
103*5ffd83dbSDimitry Andric 
104*5ffd83dbSDimitry Andric   for (SDep Pred : ToRemove)
105*5ffd83dbSDimitry Andric     SU.removePred(Pred);
106*5ffd83dbSDimitry Andric   for (SDep Pred : ToAdd)
107*5ffd83dbSDimitry Andric     DAG->addEdge(&SU, Pred);
108*5ffd83dbSDimitry Andric }
109*5ffd83dbSDimitry Andric 
110*5ffd83dbSDimitry Andric void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
111*5ffd83dbSDimitry Andric   const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
112*5ffd83dbSDimitry Andric 
113*5ffd83dbSDimitry Andric   SmallVector<SUnit *, 8> Chain;
114*5ffd83dbSDimitry Andric 
115*5ffd83dbSDimitry Andric   // Pass through DAG gathering a list of exports and removing barrier edges
116*5ffd83dbSDimitry Andric   // creating dependencies on exports. Freeing exports of successor edges
117*5ffd83dbSDimitry Andric   // allows more scheduling freedom, and nothing should be order dependent
118*5ffd83dbSDimitry Andric   // on exports.  Edges will be added later to order the exports.
119*5ffd83dbSDimitry Andric   unsigned PosCount = 0;
120*5ffd83dbSDimitry Andric   for (SUnit &SU : DAG->SUnits) {
121*5ffd83dbSDimitry Andric     if (!isExport(SU))
122*5ffd83dbSDimitry Andric       continue;
123*5ffd83dbSDimitry Andric 
124*5ffd83dbSDimitry Andric     Chain.push_back(&SU);
125*5ffd83dbSDimitry Andric     if (isPositionExport(TII, &SU))
126*5ffd83dbSDimitry Andric       PosCount++;
127*5ffd83dbSDimitry Andric 
128*5ffd83dbSDimitry Andric     removeExportDependencies(DAG, SU);
129*5ffd83dbSDimitry Andric 
130*5ffd83dbSDimitry Andric     SmallVector<SDep, 4> Succs(SU.Succs);
131*5ffd83dbSDimitry Andric     for (SDep Succ : Succs)
132*5ffd83dbSDimitry Andric       removeExportDependencies(DAG, *Succ.getSUnit());
133*5ffd83dbSDimitry Andric   }
134*5ffd83dbSDimitry Andric 
135*5ffd83dbSDimitry Andric   // Apply clustering if there are multiple exports
136*5ffd83dbSDimitry Andric   if (Chain.size() > 1) {
137*5ffd83dbSDimitry Andric     sortChain(TII, Chain, PosCount);
138*5ffd83dbSDimitry Andric     buildCluster(Chain, DAG);
139*5ffd83dbSDimitry Andric   }
140*5ffd83dbSDimitry Andric }
141*5ffd83dbSDimitry Andric 
142*5ffd83dbSDimitry Andric } // end namespace
143*5ffd83dbSDimitry Andric 
144*5ffd83dbSDimitry Andric namespace llvm {
145*5ffd83dbSDimitry Andric 
146*5ffd83dbSDimitry Andric std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
147*5ffd83dbSDimitry Andric   return std::make_unique<ExportClustering>();
148*5ffd83dbSDimitry Andric }
149*5ffd83dbSDimitry Andric 
150*5ffd83dbSDimitry Andric } // end namespace llvm
151