xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
1*e8d8bef9SDimitry Andric //===-- X86PreTileConfig.cpp - Tile Register Configure---------------------===//
2*e8d8bef9SDimitry Andric //
3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e8d8bef9SDimitry Andric //
7*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8*e8d8bef9SDimitry Andric //
9*e8d8bef9SDimitry Andric /// \file Pass to pre-config the shape of AMX register
10*e8d8bef9SDimitry Andric /// AMX register need to be configured before use. The shape of AMX register
11*e8d8bef9SDimitry Andric /// is encoded in the 1st and 2nd machine operand of AMX pseudo instructions.
12*e8d8bef9SDimitry Andric /// The pldtilecfg is to config tile registers. It should dominator all AMX
13*e8d8bef9SDimitry Andric /// instructions. The pldtilecfg produce a virtual cfg register and the cfg
14*e8d8bef9SDimitry Andric /// register is used by all AMX instructions.
15*e8d8bef9SDimitry Andric /// This pass is to find the common dominator of all AMX instructions and
16*e8d8bef9SDimitry Andric /// insert the pldtilecfg instruction. Besides the cfg register that pldtilecfg
17*e8d8bef9SDimitry Andric /// produces is inserted as the last operand of each AMX instruction. We use
18*e8d8bef9SDimitry Andric /// this scheme to model the def-use relationship between AMX config instruction
19*e8d8bef9SDimitry Andric /// and other AMX instructions. Below is an example.
20*e8d8bef9SDimitry Andric ///
21*e8d8bef9SDimitry Andric ///                        ----B1----
22*e8d8bef9SDimitry Andric ///                       /           \
23*e8d8bef9SDimitry Andric ///                      /             \
24*e8d8bef9SDimitry Andric ///                    B2               B3
25*e8d8bef9SDimitry Andric ///    %1:tile = PTILELOADDV        %2:tile = PTILELOADDV
26*e8d8bef9SDimitry Andric ///
27*e8d8bef9SDimitry Andric ///  is transformed to
28*e8d8bef9SDimitry Andric ///
29*e8d8bef9SDimitry Andric ///                            B1
30*e8d8bef9SDimitry Andric ///                 %25:tilecfg = PLDTILECFG
31*e8d8bef9SDimitry Andric ///                       /           \
32*e8d8bef9SDimitry Andric ///                      /             \
33*e8d8bef9SDimitry Andric ///  %1:tile = PTILELOADDV %25    %2:tile = PTILELOADDV %25
34*e8d8bef9SDimitry Andric //
35*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
36*e8d8bef9SDimitry Andric 
37*e8d8bef9SDimitry Andric #include "X86.h"
38*e8d8bef9SDimitry Andric #include "X86InstrBuilder.h"
39*e8d8bef9SDimitry Andric #include "X86RegisterInfo.h"
40*e8d8bef9SDimitry Andric #include "X86Subtarget.h"
41*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
42*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
43*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
44*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
45*e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h"
46*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
47*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
48*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TileShapeInfo.h"
49*e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
50*e8d8bef9SDimitry Andric 
51*e8d8bef9SDimitry Andric using namespace llvm;
52*e8d8bef9SDimitry Andric 
53*e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-pre-config"
54*e8d8bef9SDimitry Andric 
55*e8d8bef9SDimitry Andric namespace {
56*e8d8bef9SDimitry Andric 
57*e8d8bef9SDimitry Andric class X86PreTileConfig : public MachineFunctionPass {
58*e8d8bef9SDimitry Andric   // context
59*e8d8bef9SDimitry Andric   MachineFunction *MF = nullptr;
60*e8d8bef9SDimitry Andric   const X86Subtarget *ST = nullptr;
61*e8d8bef9SDimitry Andric   const TargetRegisterInfo *TRI;
62*e8d8bef9SDimitry Andric   const TargetInstrInfo *TII;
63*e8d8bef9SDimitry Andric   MachineDominatorTree *DomTree = nullptr;
64*e8d8bef9SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
65*e8d8bef9SDimitry Andric 
66*e8d8bef9SDimitry Andric   MachineInstr *getTileConfigPoint();
67*e8d8bef9SDimitry Andric 
68*e8d8bef9SDimitry Andric public:
69*e8d8bef9SDimitry Andric   X86PreTileConfig() : MachineFunctionPass(ID) {}
70*e8d8bef9SDimitry Andric 
71*e8d8bef9SDimitry Andric   /// Return the pass name.
72*e8d8bef9SDimitry Andric   StringRef getPassName() const override {
73*e8d8bef9SDimitry Andric     return "Tile Register Pre-configure";
74*e8d8bef9SDimitry Andric   }
75*e8d8bef9SDimitry Andric 
76*e8d8bef9SDimitry Andric   /// X86PreTileConfig analysis usage.
77*e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
78*e8d8bef9SDimitry Andric 
79*e8d8bef9SDimitry Andric   /// Perform register allocation.
80*e8d8bef9SDimitry Andric   bool runOnMachineFunction(MachineFunction &mf) override;
81*e8d8bef9SDimitry Andric 
82*e8d8bef9SDimitry Andric   static char ID;
83*e8d8bef9SDimitry Andric };
84*e8d8bef9SDimitry Andric 
85*e8d8bef9SDimitry Andric } // end anonymous namespace
86*e8d8bef9SDimitry Andric 
87*e8d8bef9SDimitry Andric char X86PreTileConfig::ID = 0;
88*e8d8bef9SDimitry Andric 
89*e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86PreTileConfig, "tilepreconfig",
90*e8d8bef9SDimitry Andric                       "Tile Register Configure", false, false)
91*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
92*e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig",
93*e8d8bef9SDimitry Andric                     "Tile Register Configure", false, false)
94*e8d8bef9SDimitry Andric 
95*e8d8bef9SDimitry Andric void X86PreTileConfig::getAnalysisUsage(AnalysisUsage &AU) const {
96*e8d8bef9SDimitry Andric   AU.setPreservesAll();
97*e8d8bef9SDimitry Andric   AU.addRequired<MachineDominatorTree>();
98*e8d8bef9SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
99*e8d8bef9SDimitry Andric }
100*e8d8bef9SDimitry Andric 
101*e8d8bef9SDimitry Andric static Register buildConfigMI(MachineBasicBlock::iterator MI, int FrameIdx,
102*e8d8bef9SDimitry Andric                               const TargetInstrInfo *TII,
103*e8d8bef9SDimitry Andric                               MachineRegisterInfo *MRI,
104*e8d8bef9SDimitry Andric                               const X86Subtarget *ST) {
105*e8d8bef9SDimitry Andric   auto *MBB = MI->getParent();
106*e8d8bef9SDimitry Andric 
107*e8d8bef9SDimitry Andric   // FIXME: AMX should assume AVX512 enabled.
108*e8d8bef9SDimitry Andric   if (ST->hasAVX512()) {
109*e8d8bef9SDimitry Andric     // Zero stack slot.
110*e8d8bef9SDimitry Andric     Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
111*e8d8bef9SDimitry Andric     BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORDZrr), Zmm)
112*e8d8bef9SDimitry Andric         .addReg(Zmm, RegState::Undef)
113*e8d8bef9SDimitry Andric         .addReg(Zmm, RegState::Undef);
114*e8d8bef9SDimitry Andric     addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSZmr)),
115*e8d8bef9SDimitry Andric                       FrameIdx)
116*e8d8bef9SDimitry Andric         .addReg(Zmm);
117*e8d8bef9SDimitry Andric   }
118*e8d8bef9SDimitry Andric 
119*e8d8bef9SDimitry Andric   // build psuedo ldtilecfg
120*e8d8bef9SDimitry Andric   Register VReg = MRI->createVirtualRegister(&X86::TILECFGRegClass);
121*e8d8bef9SDimitry Andric 
122*e8d8bef9SDimitry Andric   addFrameReference(
123*e8d8bef9SDimitry Andric       BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::PLDTILECFG), VReg), FrameIdx);
124*e8d8bef9SDimitry Andric 
125*e8d8bef9SDimitry Andric   return VReg;
126*e8d8bef9SDimitry Andric }
127*e8d8bef9SDimitry Andric 
128*e8d8bef9SDimitry Andric static ShapeT getShape(const MachineInstr &MI, MachineRegisterInfo *MRI) {
129*e8d8bef9SDimitry Andric   unsigned Opcode = MI.getOpcode();
130*e8d8bef9SDimitry Andric   switch (Opcode) {
131*e8d8bef9SDimitry Andric   default:
132*e8d8bef9SDimitry Andric     llvm_unreachable("Unexpected machine instruction on tile");
133*e8d8bef9SDimitry Andric   case X86::PTILELOADDV:
134*e8d8bef9SDimitry Andric   case X86::PTDPBSSDV:
135*e8d8bef9SDimitry Andric   case X86::PTILEZEROV:
136*e8d8bef9SDimitry Andric     MachineOperand &MO1 = const_cast<MachineOperand &>(MI.getOperand(1));
137*e8d8bef9SDimitry Andric     MachineOperand &MO2 = const_cast<MachineOperand &>(MI.getOperand(2));
138*e8d8bef9SDimitry Andric     ShapeT Shape(&MO1, &MO2, MRI);
139*e8d8bef9SDimitry Andric     return Shape;
140*e8d8bef9SDimitry Andric   }
141*e8d8bef9SDimitry Andric }
142*e8d8bef9SDimitry Andric 
143*e8d8bef9SDimitry Andric MachineInstr *X86PreTileConfig::getTileConfigPoint() {
144*e8d8bef9SDimitry Andric   DenseMap<Register, ShapeT> PhysShapeInfo;
145*e8d8bef9SDimitry Andric   MachineBasicBlock *MBB = nullptr;
146*e8d8bef9SDimitry Andric   DenseSet<const MachineInstr *> MIs;
147*e8d8bef9SDimitry Andric   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
148*e8d8bef9SDimitry Andric     Register VirtReg = Register::index2VirtReg(i);
149*e8d8bef9SDimitry Andric     if (MRI->reg_nodbg_empty(VirtReg))
150*e8d8bef9SDimitry Andric       continue;
151*e8d8bef9SDimitry Andric     const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
152*e8d8bef9SDimitry Andric     if (RC.getID() != X86::TILERegClassID)
153*e8d8bef9SDimitry Andric       continue;
154*e8d8bef9SDimitry Andric 
155*e8d8bef9SDimitry Andric     // Find the common dominator for all MI that define tile register.
156*e8d8bef9SDimitry Andric     for (const MachineOperand &MO : MRI->def_operands(VirtReg)) {
157*e8d8bef9SDimitry Andric       if (MO.isUndef())
158*e8d8bef9SDimitry Andric         continue;
159*e8d8bef9SDimitry Andric       const auto *MI = MO.getParent();
160*e8d8bef9SDimitry Andric       // PHI or IMPLICIT_DEF instructiion.
161*e8d8bef9SDimitry Andric       // There must be a input tile before PHI instruction.
162*e8d8bef9SDimitry Andric       if (MI->isTransient())
163*e8d8bef9SDimitry Andric         continue;
164*e8d8bef9SDimitry Andric       if (!MBB)
165*e8d8bef9SDimitry Andric         MBB = const_cast<MachineBasicBlock *>(MI->getParent());
166*e8d8bef9SDimitry Andric       MBB = DomTree->findNearestCommonDominator(
167*e8d8bef9SDimitry Andric           MBB, const_cast<MachineBasicBlock *>(MI->getParent()));
168*e8d8bef9SDimitry Andric 
169*e8d8bef9SDimitry Andric       // Collect the instructions that define shape.
170*e8d8bef9SDimitry Andric       ShapeT Shape = getShape(*MI, MRI);
171*e8d8bef9SDimitry Andric       std::array<MachineOperand *, 2> ShapeMOs = {Shape.getRow(),
172*e8d8bef9SDimitry Andric                                                   Shape.getCol()};
173*e8d8bef9SDimitry Andric       for (auto *ShapeMO : ShapeMOs) {
174*e8d8bef9SDimitry Andric         Register ShapeReg = ShapeMO->getReg();
175*e8d8bef9SDimitry Andric         for (const MachineOperand &MO : MRI->def_operands(ShapeReg)) {
176*e8d8bef9SDimitry Andric           const auto *ShapeMI = MO.getParent();
177*e8d8bef9SDimitry Andric           MIs.insert(ShapeMI);
178*e8d8bef9SDimitry Andric         }
179*e8d8bef9SDimitry Andric       }
180*e8d8bef9SDimitry Andric     }
181*e8d8bef9SDimitry Andric   }
182*e8d8bef9SDimitry Andric   if (!MBB)
183*e8d8bef9SDimitry Andric     return nullptr;
184*e8d8bef9SDimitry Andric   // This pass is before the pass of eliminating PHI node, so it
185*e8d8bef9SDimitry Andric   // is in SSA form.
186*e8d8bef9SDimitry Andric   assert(MRI->isSSA() && "Not SSA form in pre-tile config");
187*e8d8bef9SDimitry Andric   // Shape def should dominate tile config MBB.
188*e8d8bef9SDimitry Andric   //    def s           s1    s2
189*e8d8bef9SDimitry Andric   //     / \             \   /
190*e8d8bef9SDimitry Andric   //    /   \             \ /
191*e8d8bef9SDimitry Andric   //  conf               s3=phi(s1,s2)
192*e8d8bef9SDimitry Andric   //                       |
193*e8d8bef9SDimitry Andric   //                       c
194*e8d8bef9SDimitry Andric   //
195*e8d8bef9SDimitry Andric   for (const auto *MI : MIs) {
196*e8d8bef9SDimitry Andric     const MachineBasicBlock *ShapeMBB = MI->getParent();
197*e8d8bef9SDimitry Andric     if (DomTree->dominates(ShapeMBB, MBB))
198*e8d8bef9SDimitry Andric       continue;
199*e8d8bef9SDimitry Andric     if (MI->isMoveImmediate())
200*e8d8bef9SDimitry Andric       continue;
201*e8d8bef9SDimitry Andric     report_fatal_error(MF->getName() + ": Failed to config tile register, "
202*e8d8bef9SDimitry Andric                                        "please define the shape earlier");
203*e8d8bef9SDimitry Andric   }
204*e8d8bef9SDimitry Andric 
205*e8d8bef9SDimitry Andric   // ldtilecfg should be inserted after the MI that define the shape.
206*e8d8bef9SDimitry Andric   MachineBasicBlock::reverse_instr_iterator I, E;
207*e8d8bef9SDimitry Andric   for (I = MBB->instr_rbegin(), E = MBB->instr_rend(); I != E; ++I) {
208*e8d8bef9SDimitry Andric     auto *MI = &*I;
209*e8d8bef9SDimitry Andric     if (MIs.count(MI) && (!MI->isMoveImmediate()))
210*e8d8bef9SDimitry Andric       break;
211*e8d8bef9SDimitry Andric   }
212*e8d8bef9SDimitry Andric   MachineBasicBlock::iterator MII;
213*e8d8bef9SDimitry Andric   if (I == E)
214*e8d8bef9SDimitry Andric     MII = MBB->getFirstNonPHI();
215*e8d8bef9SDimitry Andric   else {
216*e8d8bef9SDimitry Andric     MII = MachineBasicBlock::iterator(&*I);
217*e8d8bef9SDimitry Andric     MII++;
218*e8d8bef9SDimitry Andric   }
219*e8d8bef9SDimitry Andric   return &*MII;
220*e8d8bef9SDimitry Andric }
221*e8d8bef9SDimitry Andric 
222*e8d8bef9SDimitry Andric static void addTileCFGUse(MachineFunction &MF, Register CFG) {
223*e8d8bef9SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
224*e8d8bef9SDimitry Andric 
225*e8d8bef9SDimitry Andric     // Traverse the basic block.
226*e8d8bef9SDimitry Andric     for (MachineInstr &MI : MBB) {
227*e8d8bef9SDimitry Andric       unsigned Opcode = MI.getOpcode();
228*e8d8bef9SDimitry Andric       switch (Opcode) {
229*e8d8bef9SDimitry Andric       default:
230*e8d8bef9SDimitry Andric         break;
231*e8d8bef9SDimitry Andric       case X86::PTILELOADDV:
232*e8d8bef9SDimitry Andric       case X86::PTILESTOREDV:
233*e8d8bef9SDimitry Andric       case X86::PTDPBSSDV:
234*e8d8bef9SDimitry Andric       case X86::PTILEZEROV:
235*e8d8bef9SDimitry Andric         unsigned NumOperands = MI.getNumOperands();
236*e8d8bef9SDimitry Andric         MI.RemoveOperand(NumOperands - 1);
237*e8d8bef9SDimitry Andric         MI.addOperand(MF, MachineOperand::CreateReg(CFG, false));
238*e8d8bef9SDimitry Andric         break;
239*e8d8bef9SDimitry Andric       }
240*e8d8bef9SDimitry Andric     }
241*e8d8bef9SDimitry Andric   }
242*e8d8bef9SDimitry Andric }
243*e8d8bef9SDimitry Andric 
244*e8d8bef9SDimitry Andric bool X86PreTileConfig::runOnMachineFunction(MachineFunction &mf) {
245*e8d8bef9SDimitry Andric   MF = &mf;
246*e8d8bef9SDimitry Andric   MRI = &mf.getRegInfo();
247*e8d8bef9SDimitry Andric   ST = &mf.getSubtarget<X86Subtarget>();
248*e8d8bef9SDimitry Andric   TRI = ST->getRegisterInfo();
249*e8d8bef9SDimitry Andric   TII = mf.getSubtarget().getInstrInfo();
250*e8d8bef9SDimitry Andric   DomTree = &getAnalysis<MachineDominatorTree>();
251*e8d8bef9SDimitry Andric 
252*e8d8bef9SDimitry Andric   MachineInstr *MI = getTileConfigPoint();
253*e8d8bef9SDimitry Andric   if (!MI)
254*e8d8bef9SDimitry Andric     return false;
255*e8d8bef9SDimitry Andric   unsigned Size = ST->getTileConfigSize();
256*e8d8bef9SDimitry Andric   Align Alignment = ST->getTileConfigAlignment();
257*e8d8bef9SDimitry Andric   int SS = mf.getFrameInfo().CreateStackObject(Size, Alignment, false);
258*e8d8bef9SDimitry Andric   Register CFG = buildConfigMI(MI, SS, TII, MRI, ST);
259*e8d8bef9SDimitry Andric   addTileCFGUse(mf, CFG);
260*e8d8bef9SDimitry Andric   return true;
261*e8d8bef9SDimitry Andric }
262*e8d8bef9SDimitry Andric 
263*e8d8bef9SDimitry Andric FunctionPass *llvm::createX86PreTileConfigPass() {
264*e8d8bef9SDimitry Andric   return new X86PreTileConfig();
265*e8d8bef9SDimitry Andric }
266