xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1*81ad6265SDimitry Andric //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2*81ad6265SDimitry Andric //
3*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*81ad6265SDimitry Andric //
7*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
8*81ad6265SDimitry Andric //
9*81ad6265SDimitry Andric /// \file Pass to preconfig the shape of physical tile registers
10*81ad6265SDimitry Andric /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11*81ad6265SDimitry Andric /// walk each instruction of basic block in reverse order. All the tile
12*81ad6265SDimitry Andric /// registers that live out the basic block would be spilled and reloaded
13*81ad6265SDimitry Andric /// before its user. It also check the depenedency of the shape to ensure
14*81ad6265SDimitry Andric /// the shape is defined before ldtilecfg.
15*81ad6265SDimitry Andric //
16*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
17*81ad6265SDimitry Andric 
18*81ad6265SDimitry Andric #include "X86.h"
19*81ad6265SDimitry Andric #include "X86InstrBuilder.h"
20*81ad6265SDimitry Andric #include "X86MachineFunctionInfo.h"
21*81ad6265SDimitry Andric #include "X86RegisterInfo.h"
22*81ad6265SDimitry Andric #include "X86Subtarget.h"
23*81ad6265SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
24*81ad6265SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
25*81ad6265SDimitry Andric #include "llvm/ADT/Statistic.h"
26*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
27*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
28*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
29*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
30*81ad6265SDimitry Andric #include "llvm/CodeGen/Passes.h"
31*81ad6265SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
32*81ad6265SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
33*81ad6265SDimitry Andric #include "llvm/InitializePasses.h"
34*81ad6265SDimitry Andric #include "llvm/Support/Debug.h"
35*81ad6265SDimitry Andric 
36*81ad6265SDimitry Andric using namespace llvm;
37*81ad6265SDimitry Andric 
38*81ad6265SDimitry Andric #define DEBUG_TYPE "fastpretileconfig"
39*81ad6265SDimitry Andric 
40*81ad6265SDimitry Andric STATISTIC(NumStores, "Number of stores added");
41*81ad6265SDimitry Andric STATISTIC(NumLoads, "Number of loads added");
42*81ad6265SDimitry Andric 
43*81ad6265SDimitry Andric namespace {
44*81ad6265SDimitry Andric 
45*81ad6265SDimitry Andric class X86FastPreTileConfig : public MachineFunctionPass {
46*81ad6265SDimitry Andric   MachineFunction *MF = nullptr;
47*81ad6265SDimitry Andric   const X86Subtarget *ST = nullptr;
48*81ad6265SDimitry Andric   const TargetInstrInfo *TII = nullptr;
49*81ad6265SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
50*81ad6265SDimitry Andric   X86MachineFunctionInfo *X86FI = nullptr;
51*81ad6265SDimitry Andric   MachineFrameInfo *MFI = nullptr;
52*81ad6265SDimitry Andric   const TargetRegisterInfo *TRI = nullptr;
53*81ad6265SDimitry Andric   MachineBasicBlock *MBB = nullptr;
54*81ad6265SDimitry Andric   int CfgSS = -1;
55*81ad6265SDimitry Andric   struct PHIInfo {
56*81ad6265SDimitry Andric     Register Row;
57*81ad6265SDimitry Andric     Register Col;
58*81ad6265SDimitry Andric     Register StackAddr;
59*81ad6265SDimitry Andric   };
60*81ad6265SDimitry Andric   DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
61*81ad6265SDimitry Andric 
62*81ad6265SDimitry Andric   /// Maps virtual regs to the frame index where these values are spilled.
63*81ad6265SDimitry Andric   IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
64*81ad6265SDimitry Andric 
65*81ad6265SDimitry Andric   /// Has a bit set for tile virtual register for which it was determined
66*81ad6265SDimitry Andric   /// that it is alive across blocks.
67*81ad6265SDimitry Andric   BitVector MayLiveAcrossBlocks;
68*81ad6265SDimitry Andric 
69*81ad6265SDimitry Andric   int getStackSpaceFor(Register VirtReg);
70*81ad6265SDimitry Andric   void InitializeTileConfigStackSpace();
71*81ad6265SDimitry Andric   bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
72*81ad6265SDimitry Andric   void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
73*81ad6265SDimitry Andric   void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
74*81ad6265SDimitry Andric               MachineOperand *RowMO, MachineOperand *ColMO);
75*81ad6265SDimitry Andric   void canonicalizePHIs(MachineBasicBlock &MBB);
76*81ad6265SDimitry Andric   void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
77*81ad6265SDimitry Andric   void convertPHIs(MachineBasicBlock &MBB);
78*81ad6265SDimitry Andric   bool configBasicBlock(MachineBasicBlock &MBB);
79*81ad6265SDimitry Andric 
80*81ad6265SDimitry Andric public:
81*81ad6265SDimitry Andric   X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
82*81ad6265SDimitry Andric 
83*81ad6265SDimitry Andric   /// Return the pass name.
84*81ad6265SDimitry Andric   StringRef getPassName() const override {
85*81ad6265SDimitry Andric     return "Fast Tile Register Preconfigure";
86*81ad6265SDimitry Andric   }
87*81ad6265SDimitry Andric 
88*81ad6265SDimitry Andric   /// Perform tile register configure.
89*81ad6265SDimitry Andric   bool runOnMachineFunction(MachineFunction &MFunc) override;
90*81ad6265SDimitry Andric 
91*81ad6265SDimitry Andric   static char ID;
92*81ad6265SDimitry Andric };
93*81ad6265SDimitry Andric 
94*81ad6265SDimitry Andric } // end anonymous namespace
95*81ad6265SDimitry Andric 
96*81ad6265SDimitry Andric char X86FastPreTileConfig::ID = 0;
97*81ad6265SDimitry Andric 
98*81ad6265SDimitry Andric INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
99*81ad6265SDimitry Andric                       "Fast Tile Register Preconfigure", false, false)
100*81ad6265SDimitry Andric INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
101*81ad6265SDimitry Andric                     "Fast Tile Register Preconfigure", false, false)
102*81ad6265SDimitry Andric 
103*81ad6265SDimitry Andric static bool dominates(MachineBasicBlock &MBB,
104*81ad6265SDimitry Andric                       MachineBasicBlock::const_iterator A,
105*81ad6265SDimitry Andric                       MachineBasicBlock::const_iterator B) {
106*81ad6265SDimitry Andric   auto MBBEnd = MBB.end();
107*81ad6265SDimitry Andric   if (B == MBBEnd)
108*81ad6265SDimitry Andric     return true;
109*81ad6265SDimitry Andric 
110*81ad6265SDimitry Andric   MachineBasicBlock::const_iterator I = MBB.begin();
111*81ad6265SDimitry Andric   for (; &*I != A && &*I != B; ++I)
112*81ad6265SDimitry Andric     ;
113*81ad6265SDimitry Andric 
114*81ad6265SDimitry Andric   return &*I == A;
115*81ad6265SDimitry Andric }
116*81ad6265SDimitry Andric 
117*81ad6265SDimitry Andric /// This allocates space for the specified virtual register to be held on the
118*81ad6265SDimitry Andric /// stack.
119*81ad6265SDimitry Andric int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
120*81ad6265SDimitry Andric   // Find the location Reg would belong...
121*81ad6265SDimitry Andric   int SS = StackSlotForVirtReg[VirtReg];
122*81ad6265SDimitry Andric   // Already has space allocated?
123*81ad6265SDimitry Andric   if (SS != -1)
124*81ad6265SDimitry Andric     return SS;
125*81ad6265SDimitry Andric 
126*81ad6265SDimitry Andric   // Allocate a new stack object for this spill location...
127*81ad6265SDimitry Andric   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
128*81ad6265SDimitry Andric   unsigned Size = TRI->getSpillSize(RC);
129*81ad6265SDimitry Andric   Align Alignment = TRI->getSpillAlign(RC);
130*81ad6265SDimitry Andric   int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
131*81ad6265SDimitry Andric 
132*81ad6265SDimitry Andric   // Assign the slot.
133*81ad6265SDimitry Andric   StackSlotForVirtReg[VirtReg] = FrameIdx;
134*81ad6265SDimitry Andric   return FrameIdx;
135*81ad6265SDimitry Andric }
136*81ad6265SDimitry Andric 
137*81ad6265SDimitry Andric /// Returns false if \p VirtReg is known to not live out of the current config.
138*81ad6265SDimitry Andric /// If \p VirtReg live out of the current MBB, it must live out of the current
139*81ad6265SDimitry Andric /// config
140*81ad6265SDimitry Andric bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
141*81ad6265SDimitry Andric   if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
142*81ad6265SDimitry Andric     return true;
143*81ad6265SDimitry Andric 
144*81ad6265SDimitry Andric   for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
145*81ad6265SDimitry Andric     if (UseInst.getParent() != MBB) {
146*81ad6265SDimitry Andric       MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
147*81ad6265SDimitry Andric       return true;
148*81ad6265SDimitry Andric     }
149*81ad6265SDimitry Andric 
150*81ad6265SDimitry Andric     // The use and def are in the same MBB. If the tile register is
151*81ad6265SDimitry Andric     // reconfigured, it is crobbered and we need to spill and reload
152*81ad6265SDimitry Andric     // tile register.
153*81ad6265SDimitry Andric     if (CfgMI) {
154*81ad6265SDimitry Andric       if (dominates(*MBB, *CfgMI, UseInst)) {
155*81ad6265SDimitry Andric         MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
156*81ad6265SDimitry Andric         return true;
157*81ad6265SDimitry Andric       }
158*81ad6265SDimitry Andric     }
159*81ad6265SDimitry Andric   }
160*81ad6265SDimitry Andric 
161*81ad6265SDimitry Andric   return false;
162*81ad6265SDimitry Andric }
163*81ad6265SDimitry Andric 
164*81ad6265SDimitry Andric void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
165*81ad6265SDimitry Andric   MachineBasicBlock &MBB = MF->front();
166*81ad6265SDimitry Andric   MachineInstr *MI = &*MBB.getFirstNonPHI();
167*81ad6265SDimitry Andric   DebugLoc DL;
168*81ad6265SDimitry Andric   if (ST->hasAVX512()) {
169*81ad6265SDimitry Andric     Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
170*81ad6265SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
171*81ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
172*81ad6265SDimitry Andric         .addReg(Zmm);
173*81ad6265SDimitry Andric   } else if (ST->hasAVX2()) {
174*81ad6265SDimitry Andric     Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
175*81ad6265SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
176*81ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
177*81ad6265SDimitry Andric         .addReg(Ymm);
178*81ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
179*81ad6265SDimitry Andric                       32)
180*81ad6265SDimitry Andric         .addReg(Ymm);
181*81ad6265SDimitry Andric   } else {
182*81ad6265SDimitry Andric     assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
183*81ad6265SDimitry Andric     unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
184*81ad6265SDimitry Andric     Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
185*81ad6265SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
186*81ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
187*81ad6265SDimitry Andric         .addReg(Xmm);
188*81ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
189*81ad6265SDimitry Andric         .addReg(Xmm);
190*81ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
191*81ad6265SDimitry Andric         .addReg(Xmm);
192*81ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
193*81ad6265SDimitry Andric         .addReg(Xmm);
194*81ad6265SDimitry Andric   }
195*81ad6265SDimitry Andric   // Fill in the palette first.
196*81ad6265SDimitry Andric   addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
197*81ad6265SDimitry Andric       .addImm(1);
198*81ad6265SDimitry Andric }
199*81ad6265SDimitry Andric 
200*81ad6265SDimitry Andric /// Insert spill instruction for \p AssignedReg before \p Before.
201*81ad6265SDimitry Andric /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
202*81ad6265SDimitry Andric void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
203*81ad6265SDimitry Andric                                  Register VirtReg, bool Kill) {
204*81ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
205*81ad6265SDimitry Andric   int FI = getStackSpaceFor(VirtReg);
206*81ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
207*81ad6265SDimitry Andric 
208*81ad6265SDimitry Andric   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
209*81ad6265SDimitry Andric   // Don't need shape information for tile store, becasue it is adjacent to
210*81ad6265SDimitry Andric   // the tile def instruction.
211*81ad6265SDimitry Andric   TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI);
212*81ad6265SDimitry Andric   ++NumStores;
213*81ad6265SDimitry Andric 
214*81ad6265SDimitry Andric   // TODO: update DBG_VALUEs
215*81ad6265SDimitry Andric }
216*81ad6265SDimitry Andric 
217*81ad6265SDimitry Andric /// Insert reload instruction for \p PhysReg before \p Before.
218*81ad6265SDimitry Andric void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
219*81ad6265SDimitry Andric                                   Register OrigReg, MachineOperand *RowMO,
220*81ad6265SDimitry Andric                                   MachineOperand *ColMO) {
221*81ad6265SDimitry Andric   int FI = getStackSpaceFor(OrigReg);
222*81ad6265SDimitry Andric   const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
223*81ad6265SDimitry Andric   Register TileReg;
224*81ad6265SDimitry Andric   // Fold copy to tileload
225*81ad6265SDimitry Andric   // BB1:
226*81ad6265SDimitry Andric   // spill src to s
227*81ad6265SDimitry Andric   //
228*81ad6265SDimitry Andric   // BB2:
229*81ad6265SDimitry Andric   // t = copy src
230*81ad6265SDimitry Andric   // -->
231*81ad6265SDimitry Andric   // t = tileload (s)
232*81ad6265SDimitry Andric   if (UseMI->isCopy())
233*81ad6265SDimitry Andric     TileReg = UseMI->getOperand(0).getReg();
234*81ad6265SDimitry Andric   else
235*81ad6265SDimitry Andric     TileReg = MRI->createVirtualRegister(&RC);
236*81ad6265SDimitry Andric   // Can't use TII->loadRegFromStackSlot(), because we need the shape
237*81ad6265SDimitry Andric   // information for reload.
238*81ad6265SDimitry Andric   // tileloadd (%sp, %idx), %tmm
239*81ad6265SDimitry Andric   unsigned Opc = X86::PTILELOADDV;
240*81ad6265SDimitry Andric   Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
241*81ad6265SDimitry Andric   // FIXME: MBB is not the parent of UseMI.
242*81ad6265SDimitry Andric   MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
243*81ad6265SDimitry Andric                                 TII->get(X86::MOV64ri), StrideReg)
244*81ad6265SDimitry Andric                             .addImm(64);
245*81ad6265SDimitry Andric   NewMI = addFrameReference(
246*81ad6265SDimitry Andric       BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
247*81ad6265SDimitry Andric           .addReg(RowMO->getReg())
248*81ad6265SDimitry Andric           .addReg(ColMO->getReg()),
249*81ad6265SDimitry Andric       FI);
250*81ad6265SDimitry Andric   MachineOperand &MO = NewMI->getOperand(5);
251*81ad6265SDimitry Andric   MO.setReg(StrideReg);
252*81ad6265SDimitry Andric   MO.setIsKill(true);
253*81ad6265SDimitry Andric   RowMO->setIsKill(false);
254*81ad6265SDimitry Andric   ColMO->setIsKill(false);
255*81ad6265SDimitry Andric   // Erase copy instruction after it is folded.
256*81ad6265SDimitry Andric   if (UseMI->isCopy()) {
257*81ad6265SDimitry Andric     UseMI->eraseFromParent();
258*81ad6265SDimitry Andric   } else {
259*81ad6265SDimitry Andric     // Replace the register in the user MI.
260*81ad6265SDimitry Andric     for (auto &MO : UseMI->operands()) {
261*81ad6265SDimitry Andric       if (MO.isReg() && MO.getReg() == OrigReg)
262*81ad6265SDimitry Andric         MO.setReg(TileReg);
263*81ad6265SDimitry Andric     }
264*81ad6265SDimitry Andric   }
265*81ad6265SDimitry Andric 
266*81ad6265SDimitry Andric   ++NumLoads;
267*81ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
268*81ad6265SDimitry Andric                     << printReg(TileReg, TRI) << '\n');
269*81ad6265SDimitry Andric }
270*81ad6265SDimitry Andric 
271*81ad6265SDimitry Andric static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
272*81ad6265SDimitry Andric   // The instruction must have 3 operands: tile def, row, col.
273*81ad6265SDimitry Andric   if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
274*81ad6265SDimitry Andric     return false;
275*81ad6265SDimitry Andric   MachineOperand &MO = MI.getOperand(0);
276*81ad6265SDimitry Andric 
277*81ad6265SDimitry Andric   if (MO.isReg()) {
278*81ad6265SDimitry Andric     Register Reg = MO.getReg();
279*81ad6265SDimitry Andric     // FIXME it may be used after Greedy RA and the physical
280*81ad6265SDimitry Andric     // register is not rewritten yet.
281*81ad6265SDimitry Andric     if (Reg.isVirtual() &&
282*81ad6265SDimitry Andric         MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
283*81ad6265SDimitry Andric       return true;
284*81ad6265SDimitry Andric     if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
285*81ad6265SDimitry Andric       return true;
286*81ad6265SDimitry Andric   }
287*81ad6265SDimitry Andric 
288*81ad6265SDimitry Andric   return false;
289*81ad6265SDimitry Andric }
290*81ad6265SDimitry Andric 
291*81ad6265SDimitry Andric static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
292*81ad6265SDimitry Andric   MachineInstr *MI = MRI->getVRegDef(TileReg);
293*81ad6265SDimitry Andric   if (isTileDef(MRI, *MI)) {
294*81ad6265SDimitry Andric     MachineOperand *RowMO = &MI->getOperand(1);
295*81ad6265SDimitry Andric     MachineOperand *ColMO = &MI->getOperand(2);
296*81ad6265SDimitry Andric     return ShapeT(RowMO, ColMO, MRI);
297*81ad6265SDimitry Andric   } else if (MI->isCopy()) {
298*81ad6265SDimitry Andric     TileReg = MI->getOperand(1).getReg();
299*81ad6265SDimitry Andric     return getShape(MRI, TileReg);
300*81ad6265SDimitry Andric   }
301*81ad6265SDimitry Andric 
302*81ad6265SDimitry Andric   // The def should not be PHI node, because we walk the MBB in reverse post
303*81ad6265SDimitry Andric   // order.
304*81ad6265SDimitry Andric   assert(MI->isPHI() && "Unexpected PHI when get shape.");
305*81ad6265SDimitry Andric   llvm_unreachable("Unexpected MI when get shape.");
306*81ad6265SDimitry Andric }
307*81ad6265SDimitry Andric 
308*81ad6265SDimitry Andric // BB0:
309*81ad6265SDimitry Andric // spill t0 to s0
310*81ad6265SDimitry Andric // BB1:
311*81ad6265SDimitry Andric // spill t1 to s1
312*81ad6265SDimitry Andric //
313*81ad6265SDimitry Andric // BB2:
314*81ad6265SDimitry Andric // t = phi [t0, bb0] [t1, bb1]
315*81ad6265SDimitry Andric // -->
316*81ad6265SDimitry Andric // row = phi [r0, bb0] [r1, bb1]
317*81ad6265SDimitry Andric // col = phi [c0, bb0] [c1, bb1]
318*81ad6265SDimitry Andric //   s = phi [s0, bb0] [s1, bb1]
319*81ad6265SDimitry Andric //   t = tileload row, col, s
320*81ad6265SDimitry Andric // The new instruction is inserted at the end of the phi node. The order
321*81ad6265SDimitry Andric // of the original phi node is not ensured.
322*81ad6265SDimitry Andric void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
323*81ad6265SDimitry Andric                                       MachineInstr &PHI) {
324*81ad6265SDimitry Andric   // 1. Create instruction to get stack slot address of each incoming block.
325*81ad6265SDimitry Andric   // 2. Create PHI node for the stack address.
326*81ad6265SDimitry Andric   // 3. Create PHI node for shape. If one of the incoming shape is immediate
327*81ad6265SDimitry Andric   //    use the immediate and delete the PHI node.
328*81ad6265SDimitry Andric   // 4. Create tileload instruction from the stack address.
329*81ad6265SDimitry Andric   Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
330*81ad6265SDimitry Andric   MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
331*81ad6265SDimitry Andric                                         TII->get(X86::PHI), StackAddrReg);
332*81ad6265SDimitry Andric   Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
333*81ad6265SDimitry Andric   MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
334*81ad6265SDimitry Andric                                        TII->get(X86::PHI), RowReg);
335*81ad6265SDimitry Andric   Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
336*81ad6265SDimitry Andric   MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
337*81ad6265SDimitry Andric                                        TII->get(X86::PHI), ColReg);
338*81ad6265SDimitry Andric   // Record the mapping of phi node and its row/column information.
339*81ad6265SDimitry Andric   VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
340*81ad6265SDimitry Andric 
341*81ad6265SDimitry Andric   for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
342*81ad6265SDimitry Andric     // Get the 2 incoming value of tile register and MBB.
343*81ad6265SDimitry Andric     Register InTileReg = PHI.getOperand(I).getReg();
344*81ad6265SDimitry Andric     // Mark it as liveout, so that it will be spilled when visit
345*81ad6265SDimitry Andric     // the incoming MBB. Otherwise since phi will be deleted, it
346*81ad6265SDimitry Andric     // would miss spill when visit incoming MBB.
347*81ad6265SDimitry Andric     MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
348*81ad6265SDimitry Andric     MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
349*81ad6265SDimitry Andric 
350*81ad6265SDimitry Andric     MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
351*81ad6265SDimitry Andric     MachineBasicBlock::iterator InsertPos;
352*81ad6265SDimitry Andric     if (TileDefMI->isPHI()) {
353*81ad6265SDimitry Andric       InsertPos = TileDefMI->getParent()->getFirstNonPHI();
354*81ad6265SDimitry Andric       if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
355*81ad6265SDimitry Andric         //        def t1
356*81ad6265SDimitry Andric         //       /       \
357*81ad6265SDimitry Andric         //  def t2       t3 = phi(t1, t4) <--
358*81ad6265SDimitry Andric         //       \       /                  |
359*81ad6265SDimitry Andric         //      t4 = phi(t2, t3)-------------
360*81ad6265SDimitry Andric         //
361*81ad6265SDimitry Andric         // For each (row, column and stack address) append phi incoming value.
362*81ad6265SDimitry Andric         // Create r3 = phi(r1, r4)
363*81ad6265SDimitry Andric         // Create r4 = phi(r2, r3)
364*81ad6265SDimitry Andric         Register InRowReg = VisitedPHIs[TileDefMI].Row;
365*81ad6265SDimitry Andric         Register InColReg = VisitedPHIs[TileDefMI].Col;
366*81ad6265SDimitry Andric         Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
367*81ad6265SDimitry Andric         RowPHI.addReg(InRowReg).addMBB(InMBB);
368*81ad6265SDimitry Andric         ColPHI.addReg(InColReg).addMBB(InMBB);
369*81ad6265SDimitry Andric         AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
370*81ad6265SDimitry Andric         continue;
371*81ad6265SDimitry Andric       } else {
372*81ad6265SDimitry Andric         // Recursively convert PHI to tileload
373*81ad6265SDimitry Andric         convertPHI(TileDefMI->getParent(), *TileDefMI);
374*81ad6265SDimitry Andric         // The PHI node is coverted to tileload instruction. Get the stack
375*81ad6265SDimitry Andric         // address from tileload operands.
376*81ad6265SDimitry Andric         MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
377*81ad6265SDimitry Andric         assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
378*81ad6265SDimitry Andric         Register InRowReg = TileLoad->getOperand(1).getReg();
379*81ad6265SDimitry Andric         Register InColReg = TileLoad->getOperand(2).getReg();
380*81ad6265SDimitry Andric         Register InStackAddrReg = TileLoad->getOperand(3).getReg();
381*81ad6265SDimitry Andric         RowPHI.addReg(InRowReg).addMBB(InMBB);
382*81ad6265SDimitry Andric         ColPHI.addReg(InColReg).addMBB(InMBB);
383*81ad6265SDimitry Andric         AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
384*81ad6265SDimitry Andric       }
385*81ad6265SDimitry Andric     } else {
386*81ad6265SDimitry Andric       InsertPos = TileDefMI->getIterator();
387*81ad6265SDimitry Andric 
388*81ad6265SDimitry Andric       // Fill the incoming operand of row/column phi instruction.
389*81ad6265SDimitry Andric       ShapeT Shape = getShape(MRI, InTileReg);
390*81ad6265SDimitry Andric       Shape.getRow()->setIsKill(false);
391*81ad6265SDimitry Andric       Shape.getCol()->setIsKill(false);
392*81ad6265SDimitry Andric       RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
393*81ad6265SDimitry Andric       ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
394*81ad6265SDimitry Andric 
395*81ad6265SDimitry Andric       // The incoming tile register live out of its def BB, it would be spilled.
396*81ad6265SDimitry Andric       // Create MI to get the spill stack slot address for the tile register
397*81ad6265SDimitry Andric       int FI = getStackSpaceFor(InTileReg);
398*81ad6265SDimitry Andric       Register InStackAddrReg =
399*81ad6265SDimitry Andric           MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
400*81ad6265SDimitry Andric       addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
401*81ad6265SDimitry Andric                         TII->get(X86::LEA64r), InStackAddrReg)
402*81ad6265SDimitry Andric                     .addFrameIndex(FI),
403*81ad6265SDimitry Andric                 0);
404*81ad6265SDimitry Andric       AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
405*81ad6265SDimitry Andric     }
406*81ad6265SDimitry Andric   }
407*81ad6265SDimitry Andric 
408*81ad6265SDimitry Andric   MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
409*81ad6265SDimitry Andric   Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
410*81ad6265SDimitry Andric   BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
411*81ad6265SDimitry Andric       .addImm(64);
412*81ad6265SDimitry Andric   Register TileReg = PHI.getOperand(0).getReg();
413*81ad6265SDimitry Andric   MachineInstr *NewMI = addDirectMem(
414*81ad6265SDimitry Andric       BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
415*81ad6265SDimitry Andric           .addReg(RowReg)
416*81ad6265SDimitry Andric           .addReg(ColReg),
417*81ad6265SDimitry Andric       StackAddrReg);
418*81ad6265SDimitry Andric   MachineOperand &MO = NewMI->getOperand(5);
419*81ad6265SDimitry Andric   MO.setReg(StrideReg);
420*81ad6265SDimitry Andric   MO.setIsKill(true);
421*81ad6265SDimitry Andric   PHI.eraseFromParent();
422*81ad6265SDimitry Andric   VisitedPHIs.erase(&PHI);
423*81ad6265SDimitry Andric }
424*81ad6265SDimitry Andric 
425*81ad6265SDimitry Andric static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
426*81ad6265SDimitry Andric   MachineOperand &MO = MI.getOperand(0);
427*81ad6265SDimitry Andric   if (MO.isReg() && MO.getReg().isVirtual() &&
428*81ad6265SDimitry Andric       MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
429*81ad6265SDimitry Andric     return true;
430*81ad6265SDimitry Andric   return false;
431*81ad6265SDimitry Andric }
432*81ad6265SDimitry Andric 
433*81ad6265SDimitry Andric void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
434*81ad6265SDimitry Andric   SmallVector<MachineInstr *, 8> PHIs;
435*81ad6265SDimitry Andric 
436*81ad6265SDimitry Andric   for (MachineInstr &MI : MBB) {
437*81ad6265SDimitry Andric     if (!MI.isPHI())
438*81ad6265SDimitry Andric       break;
439*81ad6265SDimitry Andric     if (!isTileRegDef(MRI, MI))
440*81ad6265SDimitry Andric       continue;
441*81ad6265SDimitry Andric     PHIs.push_back(&MI);
442*81ad6265SDimitry Andric   }
443*81ad6265SDimitry Andric   // Canonicalize the phi node first. One tile phi may depeneds previous
444*81ad6265SDimitry Andric   // phi node. For below case, we need convert %t4.
445*81ad6265SDimitry Andric   //
446*81ad6265SDimitry Andric   // BB0:
447*81ad6265SDimitry Andric   // %t3 = phi (t1 BB1, t2 BB0)
448*81ad6265SDimitry Andric   // %t4 = phi (t5 BB1, t3 BB0)
449*81ad6265SDimitry Andric   // -->
450*81ad6265SDimitry Andric   // %t3 = phi (t1 BB1, t2 BB0)
451*81ad6265SDimitry Andric   // %t4 = phi (t5 BB1, t2 BB0)
452*81ad6265SDimitry Andric   //
453*81ad6265SDimitry Andric   while (!PHIs.empty()) {
454*81ad6265SDimitry Andric     MachineInstr *PHI = PHIs.pop_back_val();
455*81ad6265SDimitry Andric 
456*81ad6265SDimitry Andric     // Find the operand that is incoming from the same MBB and the def
457*81ad6265SDimitry Andric     // is also phi node.
458*81ad6265SDimitry Andric     MachineOperand *InMO = nullptr;
459*81ad6265SDimitry Andric     MachineInstr *DefMI = nullptr;
460*81ad6265SDimitry Andric     for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
461*81ad6265SDimitry Andric       Register InTileReg = PHI->getOperand(I).getReg();
462*81ad6265SDimitry Andric       MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
463*81ad6265SDimitry Andric       DefMI = MRI->getVRegDef(InTileReg);
464*81ad6265SDimitry Andric       if (InMBB != &MBB || !DefMI->isPHI())
465*81ad6265SDimitry Andric         continue;
466*81ad6265SDimitry Andric 
467*81ad6265SDimitry Andric       InMO = &PHI->getOperand(I);
468*81ad6265SDimitry Andric       break;
469*81ad6265SDimitry Andric     }
470*81ad6265SDimitry Andric     // If can't find such operand, do nothing.
471*81ad6265SDimitry Andric     if (!InMO)
472*81ad6265SDimitry Andric       continue;
473*81ad6265SDimitry Andric 
474*81ad6265SDimitry Andric     // Current phi node depends on previous phi node. Break the
475*81ad6265SDimitry Andric     // dependency.
476*81ad6265SDimitry Andric     Register DefTileReg;
477*81ad6265SDimitry Andric     for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
478*81ad6265SDimitry Andric       MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
479*81ad6265SDimitry Andric       if (InMBB != &MBB)
480*81ad6265SDimitry Andric         continue;
481*81ad6265SDimitry Andric       DefTileReg = DefMI->getOperand(I).getReg();
482*81ad6265SDimitry Andric       InMO->setReg(DefTileReg);
483*81ad6265SDimitry Andric       break;
484*81ad6265SDimitry Andric     }
485*81ad6265SDimitry Andric   }
486*81ad6265SDimitry Andric }
487*81ad6265SDimitry Andric 
488*81ad6265SDimitry Andric void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
489*81ad6265SDimitry Andric   SmallVector<MachineInstr *, 8> PHIs;
490*81ad6265SDimitry Andric   for (MachineInstr &MI : MBB) {
491*81ad6265SDimitry Andric     if (!MI.isPHI())
492*81ad6265SDimitry Andric       break;
493*81ad6265SDimitry Andric     if (!isTileRegDef(MRI, MI))
494*81ad6265SDimitry Andric       continue;
495*81ad6265SDimitry Andric     PHIs.push_back(&MI);
496*81ad6265SDimitry Andric   }
497*81ad6265SDimitry Andric   while (!PHIs.empty()) {
498*81ad6265SDimitry Andric     MachineInstr *MI = PHIs.pop_back_val();
499*81ad6265SDimitry Andric     VisitedPHIs.clear();
500*81ad6265SDimitry Andric     convertPHI(&MBB, *MI);
501*81ad6265SDimitry Andric   }
502*81ad6265SDimitry Andric }
503*81ad6265SDimitry Andric 
504*81ad6265SDimitry Andric // PreTileConfig should configure the tile registers based on basic
505*81ad6265SDimitry Andric // block.
506*81ad6265SDimitry Andric bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
507*81ad6265SDimitry Andric   this->MBB = &MBB;
508*81ad6265SDimitry Andric   bool Change = false;
509*81ad6265SDimitry Andric   MachineInstr *LastShapeMI = nullptr;
510*81ad6265SDimitry Andric   MachineInstr *LastTileCfg = nullptr;
511*81ad6265SDimitry Andric   bool HasUnconfigTile = false;
512*81ad6265SDimitry Andric 
513*81ad6265SDimitry Andric   auto Config = [&](MachineInstr &Before) {
514*81ad6265SDimitry Andric     if (CfgSS == -1)
515*81ad6265SDimitry Andric       CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
516*81ad6265SDimitry Andric                                      ST->getTileConfigAlignment(), false);
517*81ad6265SDimitry Andric     LastTileCfg = addFrameReference(
518*81ad6265SDimitry Andric         BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
519*81ad6265SDimitry Andric     LastShapeMI = nullptr;
520*81ad6265SDimitry Andric     Change = true;
521*81ad6265SDimitry Andric   };
522*81ad6265SDimitry Andric   auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
523*81ad6265SDimitry Andric     for (const MachineOperand &MO : MI.operands()) {
524*81ad6265SDimitry Andric       if (!MO.isReg())
525*81ad6265SDimitry Andric         continue;
526*81ad6265SDimitry Andric       Register Reg = MO.getReg();
527*81ad6265SDimitry Andric       if (Reg.isVirtual() &&
528*81ad6265SDimitry Andric           MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
529*81ad6265SDimitry Andric         return true;
530*81ad6265SDimitry Andric     }
531*81ad6265SDimitry Andric     return false;
532*81ad6265SDimitry Andric   };
533*81ad6265SDimitry Andric   for (MachineInstr &MI : reverse(MBB)) {
534*81ad6265SDimitry Andric     // We have transformed phi node before configuring BB.
535*81ad6265SDimitry Andric     if (MI.isPHI())
536*81ad6265SDimitry Andric       break;
537*81ad6265SDimitry Andric     // Don't collect the shape of used tile, the tile should be defined
538*81ad6265SDimitry Andric     // before the tile use. Spill and reload would happen if there is only
539*81ad6265SDimitry Andric     // tile use after ldtilecfg, so the shape can be collected from reload.
540*81ad6265SDimitry Andric     // Take below code for example. %t would be reloaded before tilestore
541*81ad6265SDimitry Andric     // call
542*81ad6265SDimitry Andric     // ....
543*81ad6265SDimitry Andric     // tilestore %r, %c, %t
544*81ad6265SDimitry Andric     // -->
545*81ad6265SDimitry Andric     // call
546*81ad6265SDimitry Andric     // ldtilecfg
547*81ad6265SDimitry Andric     // %t = tileload %r, %c
548*81ad6265SDimitry Andric     // tilestore %r, %c, %t
549*81ad6265SDimitry Andric     if (HasTileOperand(MRI, MI))
550*81ad6265SDimitry Andric       HasUnconfigTile = true;
551*81ad6265SDimitry Andric     // According to AMX ABI, all the tile registers including config register
552*81ad6265SDimitry Andric     // are volatile. Caller need to save/restore config register.
553*81ad6265SDimitry Andric     if (MI.isCall() && HasUnconfigTile) {
554*81ad6265SDimitry Andric       MachineBasicBlock::iterator I;
555*81ad6265SDimitry Andric       if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
556*81ad6265SDimitry Andric         I = ++LastShapeMI->getIterator();
557*81ad6265SDimitry Andric       else
558*81ad6265SDimitry Andric         I = ++MI.getIterator();
559*81ad6265SDimitry Andric       Config(*I);
560*81ad6265SDimitry Andric       HasUnconfigTile = false;
561*81ad6265SDimitry Andric       continue;
562*81ad6265SDimitry Andric     }
563*81ad6265SDimitry Andric     if (!isTileDef(MRI, MI))
564*81ad6265SDimitry Andric       continue;
565*81ad6265SDimitry Andric     //
566*81ad6265SDimitry Andric     //---------------------------------------------------------------------
567*81ad6265SDimitry Andric     // Don't handle COPY instruction. If the src and dst of the COPY can be
568*81ad6265SDimitry Andric     // in the same config in below case, we just check the shape of t0.
569*81ad6265SDimitry Andric     // def row0
570*81ad6265SDimitry Andric     // def col0
571*81ad6265SDimitry Andric     // ldtilecfg
572*81ad6265SDimitry Andric     // t0 = tielzero(row0, col0)
573*81ad6265SDimitry Andric     // t1 = copy t0
574*81ad6265SDimitry Andric     // ...
575*81ad6265SDimitry Andric     // If the src and dst of the COPY can NOT be in the same config in below
576*81ad6265SDimitry Andric     // case. Reload would be generated befor the copy instruction.
577*81ad6265SDimitry Andric     // def row0
578*81ad6265SDimitry Andric     // def col0
579*81ad6265SDimitry Andric     // t0 = tielzero(row0, col0)
580*81ad6265SDimitry Andric     // spill t0
581*81ad6265SDimitry Andric     // ...
582*81ad6265SDimitry Andric     // def row1
583*81ad6265SDimitry Andric     // def col1
584*81ad6265SDimitry Andric     // ldtilecfg
585*81ad6265SDimitry Andric     // t1 = tilezero(row1, col1)
586*81ad6265SDimitry Andric     // reload t0
587*81ad6265SDimitry Andric     // t1 = copy t0
588*81ad6265SDimitry Andric     //---------------------------------------------------------------------
589*81ad6265SDimitry Andric     //
590*81ad6265SDimitry Andric     // If MI dominate the last shape def instruction, we need insert
591*81ad6265SDimitry Andric     // ldtilecfg after LastShapeMI now. The config doesn't include
592*81ad6265SDimitry Andric     // current MI.
593*81ad6265SDimitry Andric     //   def row0
594*81ad6265SDimitry Andric     //   def col0
595*81ad6265SDimitry Andric     //   tilezero(row0, col0)  <- MI
596*81ad6265SDimitry Andric     //   def row1
597*81ad6265SDimitry Andric     //   def col1
598*81ad6265SDimitry Andric     //   ldtilecfg             <- insert
599*81ad6265SDimitry Andric     //   tilezero(row1, col1)
600*81ad6265SDimitry Andric     if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
601*81ad6265SDimitry Andric       Config(*(++LastShapeMI->getIterator()));
602*81ad6265SDimitry Andric     MachineOperand *RowMO = &MI.getOperand(1);
603*81ad6265SDimitry Andric     MachineOperand *ColMO = &MI.getOperand(2);
604*81ad6265SDimitry Andric     MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
605*81ad6265SDimitry Andric     MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
606*81ad6265SDimitry Andric     // If the shape is defined in current MBB, check the domination.
607*81ad6265SDimitry Andric     // FIXME how about loop?
608*81ad6265SDimitry Andric     if (RowMI->getParent() == &MBB) {
609*81ad6265SDimitry Andric       if (!LastShapeMI)
610*81ad6265SDimitry Andric         LastShapeMI = RowMI;
611*81ad6265SDimitry Andric       else if (dominates(MBB, LastShapeMI, RowMI))
612*81ad6265SDimitry Andric         LastShapeMI = RowMI;
613*81ad6265SDimitry Andric     }
614*81ad6265SDimitry Andric     if (ColMI->getParent() == &MBB) {
615*81ad6265SDimitry Andric       if (!LastShapeMI)
616*81ad6265SDimitry Andric         LastShapeMI = ColMI;
617*81ad6265SDimitry Andric       else if (dominates(MBB, LastShapeMI, ColMI))
618*81ad6265SDimitry Andric         LastShapeMI = ColMI;
619*81ad6265SDimitry Andric     }
620*81ad6265SDimitry Andric     // If there is user live out of the tilecfg, spill it and reload in
621*81ad6265SDimitry Andric     // before the user.
622*81ad6265SDimitry Andric     Register TileReg = MI.getOperand(0).getReg();
623*81ad6265SDimitry Andric     if (mayLiveOut(TileReg, LastTileCfg))
624*81ad6265SDimitry Andric       spill(++MI.getIterator(), TileReg, false);
625*81ad6265SDimitry Andric     for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
626*81ad6265SDimitry Andric       if (UseMI.getParent() == &MBB) {
627*81ad6265SDimitry Andric         // check user should not across ldtilecfg
628*81ad6265SDimitry Andric         if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
629*81ad6265SDimitry Andric           continue;
630*81ad6265SDimitry Andric         // reload befor UseMI
631*81ad6265SDimitry Andric         reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
632*81ad6265SDimitry Andric       } else {
633*81ad6265SDimitry Andric         // Don't reload for phi instruction, we handle phi reload separately.
634*81ad6265SDimitry Andric         // TODO: merge the reload for the same user MBB.
635*81ad6265SDimitry Andric         if (!UseMI.isPHI())
636*81ad6265SDimitry Andric           reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
637*81ad6265SDimitry Andric       }
638*81ad6265SDimitry Andric     }
639*81ad6265SDimitry Andric   }
640*81ad6265SDimitry Andric 
641*81ad6265SDimitry Andric   // Configure tile registers at the head of the MBB
642*81ad6265SDimitry Andric   if (HasUnconfigTile) {
643*81ad6265SDimitry Andric     MachineInstr *Before;
644*81ad6265SDimitry Andric     if (LastShapeMI == nullptr || LastShapeMI->isPHI())
645*81ad6265SDimitry Andric       Before = &*MBB.getFirstNonPHI();
646*81ad6265SDimitry Andric     else
647*81ad6265SDimitry Andric       Before = &*(++LastShapeMI->getIterator());
648*81ad6265SDimitry Andric 
649*81ad6265SDimitry Andric     Config(*Before);
650*81ad6265SDimitry Andric   }
651*81ad6265SDimitry Andric 
652*81ad6265SDimitry Andric   return Change;
653*81ad6265SDimitry Andric }
654*81ad6265SDimitry Andric 
655*81ad6265SDimitry Andric bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
656*81ad6265SDimitry Andric   MF = &MFunc;
657*81ad6265SDimitry Andric   MRI = &MFunc.getRegInfo();
658*81ad6265SDimitry Andric   ST = &MFunc.getSubtarget<X86Subtarget>();
659*81ad6265SDimitry Andric   TII = ST->getInstrInfo();
660*81ad6265SDimitry Andric   X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
661*81ad6265SDimitry Andric   MFI = &MFunc.getFrameInfo();
662*81ad6265SDimitry Andric   TRI = ST->getRegisterInfo();
663*81ad6265SDimitry Andric   CfgSS = -1;
664*81ad6265SDimitry Andric 
665*81ad6265SDimitry Andric   unsigned NumVirtRegs = MRI->getNumVirtRegs();
666*81ad6265SDimitry Andric   // Abandon early if there is no tile register to config.
667*81ad6265SDimitry Andric   bool HasVirtTileReg = false;
668*81ad6265SDimitry Andric   for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
669*81ad6265SDimitry Andric     Register VirtReg = Register::index2VirtReg(I);
670*81ad6265SDimitry Andric     if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
671*81ad6265SDimitry Andric       HasVirtTileReg = true;
672*81ad6265SDimitry Andric       break;
673*81ad6265SDimitry Andric     }
674*81ad6265SDimitry Andric   }
675*81ad6265SDimitry Andric   if (!HasVirtTileReg)
676*81ad6265SDimitry Andric     return false;
677*81ad6265SDimitry Andric 
678*81ad6265SDimitry Andric   StackSlotForVirtReg.resize(NumVirtRegs);
679*81ad6265SDimitry Andric   MayLiveAcrossBlocks.clear();
680*81ad6265SDimitry Andric   // We will create register during config. *3 is to make sure
681*81ad6265SDimitry Andric   // the virtual register number doesn't exceed the size of
682*81ad6265SDimitry Andric   // the bit vector.
683*81ad6265SDimitry Andric   MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
684*81ad6265SDimitry Andric   bool Change = false;
685*81ad6265SDimitry Andric   assert(MRI->isSSA());
686*81ad6265SDimitry Andric 
687*81ad6265SDimitry Andric   // Canonicalize the phi node first.
688*81ad6265SDimitry Andric   for (MachineBasicBlock &MBB : MFunc)
689*81ad6265SDimitry Andric     canonicalizePHIs(MBB);
690*81ad6265SDimitry Andric 
691*81ad6265SDimitry Andric   // Loop over all of the basic blocks in reverse post order and insert
692*81ad6265SDimitry Andric   // ldtilecfg for tile registers. The reserse post order is to facilitate
693*81ad6265SDimitry Andric   // PHI node convert.
694*81ad6265SDimitry Andric   ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
695*81ad6265SDimitry Andric   for (MachineBasicBlock *MBB : RPOT) {
696*81ad6265SDimitry Andric     convertPHIs(*MBB);
697*81ad6265SDimitry Andric     Change |= configBasicBlock(*MBB);
698*81ad6265SDimitry Andric   }
699*81ad6265SDimitry Andric 
700*81ad6265SDimitry Andric   if (Change)
701*81ad6265SDimitry Andric     InitializeTileConfigStackSpace();
702*81ad6265SDimitry Andric 
703*81ad6265SDimitry Andric   StackSlotForVirtReg.clear();
704*81ad6265SDimitry Andric   return Change;
705*81ad6265SDimitry Andric }
706*81ad6265SDimitry Andric 
707*81ad6265SDimitry Andric FunctionPass *llvm::createX86FastPreTileConfigPass() {
708*81ad6265SDimitry Andric   return new X86FastPreTileConfig();
709*81ad6265SDimitry Andric }
710