xref: /llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp (revision dfe43bd1ca46c59399b7cbbf81b09256232e27f9)
1 //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Pass to preconfig the shape of physical tile registers
10 /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11 /// walk each instruction of basic block in reverse order. All the tile
12 /// registers that live out the basic block would be spilled and reloaded
13 /// before its user. It also check the depenedency of the shape to ensure
14 /// the shape is defined before ldtilecfg.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "X86.h"
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/PostOrderIterator.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/CodeGen/TargetInstrInfo.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/Support/Debug.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "fastpretileconfig"
37 
38 STATISTIC(NumStores, "Number of stores added");
39 STATISTIC(NumLoads, "Number of loads added");
40 
41 namespace {
42 
43 class X86FastPreTileConfig : public MachineFunctionPass {
44   MachineFunction *MF = nullptr;
45   const X86Subtarget *ST = nullptr;
46   const TargetInstrInfo *TII = nullptr;
47   MachineRegisterInfo *MRI = nullptr;
48   X86MachineFunctionInfo *X86FI = nullptr;
49   MachineFrameInfo *MFI = nullptr;
50   const TargetRegisterInfo *TRI = nullptr;
51   MachineBasicBlock *MBB = nullptr;
52   int CfgSS = -1;
53   struct PHIInfo {
54     Register Row;
55     Register Col;
56     Register StackAddr;
57   };
58   DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
59 
60   /// Maps virtual regs to the frame index where these values are spilled.
61   IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
62 
63   /// Has a bit set for tile virtual register for which it was determined
64   /// that it is alive across blocks.
65   BitVector MayLiveAcrossBlocks;
66 
67   int getStackSpaceFor(Register VirtReg);
68   void InitializeTileConfigStackSpace();
69   bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
70   void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
71   void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
72               MachineOperand *RowMO, MachineOperand *ColMO);
73   void canonicalizePHIs(MachineBasicBlock &MBB);
74   void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
75   void convertPHIs(MachineBasicBlock &MBB);
76   bool configBasicBlock(MachineBasicBlock &MBB);
77 
78 public:
79   X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
80 
81   /// Return the pass name.
82   StringRef getPassName() const override {
83     return "Fast Tile Register Preconfigure";
84   }
85 
86   /// Perform tile register configure.
87   bool runOnMachineFunction(MachineFunction &MFunc) override;
88 
89   static char ID;
90 };
91 
92 } // end anonymous namespace
93 
94 char X86FastPreTileConfig::ID = 0;
95 
96 INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
97                       "Fast Tile Register Preconfigure", false, false)
98 INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
99                     "Fast Tile Register Preconfigure", false, false)
100 
101 static bool dominates(MachineBasicBlock &MBB,
102                       MachineBasicBlock::const_iterator A,
103                       MachineBasicBlock::const_iterator B) {
104   auto MBBEnd = MBB.end();
105   if (B == MBBEnd)
106     return true;
107 
108   MachineBasicBlock::const_iterator I = MBB.begin();
109   for (; &*I != A && &*I != B; ++I)
110     ;
111 
112   return &*I == A;
113 }
114 
115 /// This allocates space for the specified virtual register to be held on the
116 /// stack.
117 int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
118   // Find the location Reg would belong...
119   int SS = StackSlotForVirtReg[VirtReg];
120   // Already has space allocated?
121   if (SS != -1)
122     return SS;
123 
124   // Allocate a new stack object for this spill location...
125   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
126   unsigned Size = TRI->getSpillSize(RC);
127   Align Alignment = TRI->getSpillAlign(RC);
128   int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
129 
130   // Assign the slot.
131   StackSlotForVirtReg[VirtReg] = FrameIdx;
132   return FrameIdx;
133 }
134 
135 /// Returns false if \p VirtReg is known to not live out of the current config.
136 /// If \p VirtReg live out of the current MBB, it must live out of the current
137 /// config
138 bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
139   if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
140     return true;
141 
142   for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
143     if (UseInst.getParent() != MBB) {
144       MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
145       return true;
146     }
147 
148     // The use and def are in the same MBB. If the tile register is
149     // reconfigured, it is crobbered and we need to spill and reload
150     // tile register.
151     if (CfgMI) {
152       if (dominates(*MBB, *CfgMI, UseInst)) {
153         MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
154         return true;
155       }
156     }
157   }
158 
159   return false;
160 }
161 
162 void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
163   MachineBasicBlock &MBB = MF->front();
164   MachineInstr *MI = &*MBB.getFirstNonPHI();
165   DebugLoc DL;
166   if (ST->hasAVX512()) {
167     Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
168     BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
169     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
170         .addReg(Zmm);
171   } else if (ST->hasAVX2()) {
172     Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
173     BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
174     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
175         .addReg(Ymm);
176     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
177                       32)
178         .addReg(Ymm);
179   } else {
180     assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
181     unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
182     Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
183     BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
184     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
185         .addReg(Xmm);
186     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
187         .addReg(Xmm);
188     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
189         .addReg(Xmm);
190     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
191         .addReg(Xmm);
192   }
193   // Fill in the palette first.
194   addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
195       .addImm(1);
196 }
197 
198 /// Insert spill instruction for \p AssignedReg before \p Before.
199 /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
200 void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
201                                  Register VirtReg, bool Kill) {
202   LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
203   int FI = getStackSpaceFor(VirtReg);
204   LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
205 
206   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
207   // Don't need shape information for tile store, becasue it is adjacent to
208   // the tile def instruction.
209   TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,
210                            Register());
211   ++NumStores;
212 
213   // TODO: update DBG_VALUEs
214 }
215 
216 /// Insert reload instruction for \p PhysReg before \p Before.
217 void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
218                                   Register OrigReg, MachineOperand *RowMO,
219                                   MachineOperand *ColMO) {
220   int FI = getStackSpaceFor(OrigReg);
221   const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
222   Register TileReg;
223   // Fold copy to tileload
224   // BB1:
225   // spill src to s
226   //
227   // BB2:
228   // t = copy src
229   // -->
230   // t = tileload (s)
231   if (UseMI->isCopy())
232     TileReg = UseMI->getOperand(0).getReg();
233   else
234     TileReg = MRI->createVirtualRegister(&RC);
235   // Can't use TII->loadRegFromStackSlot(), because we need the shape
236   // information for reload.
237   // tileloadd (%sp, %idx), %tmm
238   unsigned Opc = X86::PTILELOADDV;
239   Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
240   // FIXME: MBB is not the parent of UseMI.
241   MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
242                                 TII->get(X86::MOV64ri), StrideReg)
243                             .addImm(64);
244   NewMI = addFrameReference(
245       BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
246           .addReg(RowMO->getReg())
247           .addReg(ColMO->getReg()),
248       FI);
249   MachineOperand &MO = NewMI->getOperand(5);
250   MO.setReg(StrideReg);
251   MO.setIsKill(true);
252   RowMO->setIsKill(false);
253   ColMO->setIsKill(false);
254   // Erase copy instruction after it is folded.
255   if (UseMI->isCopy()) {
256     UseMI->eraseFromParent();
257   } else {
258     // Replace the register in the user MI.
259     for (auto &MO : UseMI->operands()) {
260       if (MO.isReg() && MO.getReg() == OrigReg)
261         MO.setReg(TileReg);
262     }
263   }
264 
265   ++NumLoads;
266   LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
267                     << printReg(TileReg, TRI) << '\n');
268 }
269 
270 static unsigned getTileDefNum(MachineRegisterInfo *MRI, Register Reg) {
271   if (Reg.isVirtual()) {
272     unsigned RegClassID = MRI->getRegClass(Reg)->getID();
273     if (RegClassID == X86::TILERegClassID)
274       return 1;
275     if (RegClassID == X86::TILEPAIRRegClassID)
276       return 2;
277   } else {
278     if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
279       return 1;
280     if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7)
281       return 2;
282   }
283   return 0;
284 }
285 
286 static bool isTileRegister(MachineRegisterInfo *MRI, Register VirtReg) {
287   return getTileDefNum(MRI, VirtReg) > 0;
288 }
289 
290 static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
291   // The instruction must have 3 operands: tile def, row, col.
292   if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
293     return false;
294   MachineOperand &MO = MI.getOperand(0);
295 
296   if (!MO.isReg())
297     return false;
298 
299   return getTileDefNum(MRI, MO.getReg()) > 0;
300 }
301 
302 static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
303   MachineInstr *MI = MRI->getVRegDef(TileReg);
304   if (isTileDef(MRI, *MI)) {
305     MachineOperand *RowMO = &MI->getOperand(1);
306     MachineOperand *ColMO = &MI->getOperand(2);
307     return ShapeT(RowMO, ColMO, MRI);
308   } else if (MI->isCopy()) {
309     TileReg = MI->getOperand(1).getReg();
310     return getShape(MRI, TileReg);
311   }
312 
313   // The def should not be PHI node, because we walk the MBB in reverse post
314   // order.
315   assert(MI->isPHI() && "Unexpected PHI when get shape.");
316   llvm_unreachable("Unexpected MI when get shape.");
317 }
318 
319 // BB0:
320 // spill t0 to s0
321 // BB1:
322 // spill t1 to s1
323 //
324 // BB2:
325 // t = phi [t0, bb0] [t1, bb1]
326 // -->
327 // row = phi [r0, bb0] [r1, bb1]
328 // col = phi [c0, bb0] [c1, bb1]
329 //   s = phi [s0, bb0] [s1, bb1]
330 //   t = tileload row, col, s
331 // The new instruction is inserted at the end of the phi node. The order
332 // of the original phi node is not ensured.
333 void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
334                                       MachineInstr &PHI) {
335   // 1. Create instruction to get stack slot address of each incoming block.
336   // 2. Create PHI node for the stack address.
337   // 3. Create PHI node for shape. If one of the incoming shape is immediate
338   //    use the immediate and delete the PHI node.
339   // 4. Create tileload instruction from the stack address.
340   Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
341   MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
342                                         TII->get(X86::PHI), StackAddrReg);
343   Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
344   MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
345                                        TII->get(X86::PHI), RowReg);
346   Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
347   MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
348                                        TII->get(X86::PHI), ColReg);
349   // Record the mapping of phi node and its row/column information.
350   VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
351 
352   for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
353     // Get the 2 incoming value of tile register and MBB.
354     Register InTileReg = PHI.getOperand(I).getReg();
355     // Mark it as liveout, so that it will be spilled when visit
356     // the incoming MBB. Otherwise since phi will be deleted, it
357     // would miss spill when visit incoming MBB.
358     MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
359     MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
360 
361     MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
362     MachineBasicBlock::iterator InsertPos;
363     if (TileDefMI->isPHI()) {
364       InsertPos = TileDefMI->getParent()->getFirstNonPHI();
365       if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
366         //        def t1
367         //       /       \
368         //  def t2       t3 = phi(t1, t4) <--
369         //       \       /                  |
370         //      t4 = phi(t2, t3)-------------
371         //
372         // For each (row, column and stack address) append phi incoming value.
373         // Create r3 = phi(r1, r4)
374         // Create r4 = phi(r2, r3)
375         Register InRowReg = VisitedPHIs[TileDefMI].Row;
376         Register InColReg = VisitedPHIs[TileDefMI].Col;
377         Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
378         RowPHI.addReg(InRowReg).addMBB(InMBB);
379         ColPHI.addReg(InColReg).addMBB(InMBB);
380         AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
381         continue;
382       } else {
383         // Recursively convert PHI to tileload
384         convertPHI(TileDefMI->getParent(), *TileDefMI);
385         // The PHI node is coverted to tileload instruction. Get the stack
386         // address from tileload operands.
387         MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
388         assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
389         Register InRowReg = TileLoad->getOperand(1).getReg();
390         Register InColReg = TileLoad->getOperand(2).getReg();
391         Register InStackAddrReg = TileLoad->getOperand(3).getReg();
392         RowPHI.addReg(InRowReg).addMBB(InMBB);
393         ColPHI.addReg(InColReg).addMBB(InMBB);
394         AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
395       }
396     } else {
397       InsertPos = TileDefMI->getIterator();
398 
399       // Fill the incoming operand of row/column phi instruction.
400       ShapeT Shape = getShape(MRI, InTileReg);
401       Shape.getRow()->setIsKill(false);
402       Shape.getCol()->setIsKill(false);
403       RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
404       ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
405 
406       // The incoming tile register live out of its def BB, it would be spilled.
407       // Create MI to get the spill stack slot address for the tile register
408       int FI = getStackSpaceFor(InTileReg);
409       Register InStackAddrReg =
410           MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
411       addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
412                         TII->get(X86::LEA64r), InStackAddrReg)
413                     .addFrameIndex(FI),
414                 0);
415       AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
416     }
417   }
418 
419   MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
420   Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
421   BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
422       .addImm(64);
423   Register TileReg = PHI.getOperand(0).getReg();
424   MachineInstr *NewMI = addDirectMem(
425       BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
426           .addReg(RowReg)
427           .addReg(ColReg),
428       StackAddrReg);
429   MachineOperand &MO = NewMI->getOperand(5);
430   MO.setReg(StrideReg);
431   MO.setIsKill(true);
432   PHI.eraseFromParent();
433   VisitedPHIs.erase(&PHI);
434 }
435 
436 static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
437   MachineOperand &MO = MI.getOperand(0);
438   if (MO.isReg() && MO.getReg().isVirtual() && isTileRegister(MRI, MO.getReg()))
439     return true;
440   return false;
441 }
442 
443 void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
444   SmallVector<MachineInstr *, 8> PHIs;
445 
446   for (MachineInstr &MI : MBB) {
447     if (!MI.isPHI())
448       break;
449     if (!isTileRegDef(MRI, MI))
450       continue;
451     PHIs.push_back(&MI);
452   }
453   // Canonicalize the phi node first. One tile phi may depeneds previous
454   // phi node. For below case, we need convert %t4.
455   //
456   // BB0:
457   // %t3 = phi (t1 BB1, t2 BB0)
458   // %t4 = phi (t5 BB1, t3 BB0)
459   // -->
460   // %t3 = phi (t1 BB1, t2 BB0)
461   // %t4 = phi (t5 BB1, t2 BB0)
462   //
463   while (!PHIs.empty()) {
464     MachineInstr *PHI = PHIs.pop_back_val();
465 
466     // Find the operand that is incoming from the same MBB and the def
467     // is also phi node.
468     MachineOperand *InMO = nullptr;
469     MachineInstr *DefMI = nullptr;
470     for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
471       Register InTileReg = PHI->getOperand(I).getReg();
472       MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
473       DefMI = MRI->getVRegDef(InTileReg);
474       if (InMBB != &MBB || !DefMI->isPHI())
475         continue;
476 
477       InMO = &PHI->getOperand(I);
478       break;
479     }
480     // If can't find such operand, do nothing.
481     if (!InMO)
482       continue;
483 
484     // Current phi node depends on previous phi node. Break the
485     // dependency.
486     Register DefTileReg;
487     for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
488       MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
489       if (InMBB != &MBB)
490         continue;
491       DefTileReg = DefMI->getOperand(I).getReg();
492       InMO->setReg(DefTileReg);
493       break;
494     }
495   }
496 }
497 
498 void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
499   SmallVector<MachineInstr *, 8> PHIs;
500   for (MachineInstr &MI : MBB) {
501     if (!MI.isPHI())
502       break;
503     if (!isTileRegDef(MRI, MI))
504       continue;
505     PHIs.push_back(&MI);
506   }
507   while (!PHIs.empty()) {
508     MachineInstr *MI = PHIs.pop_back_val();
509     VisitedPHIs.clear();
510     convertPHI(&MBB, *MI);
511   }
512 }
513 
514 // PreTileConfig should configure the tile registers based on basic
515 // block.
516 bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
517   this->MBB = &MBB;
518   bool Change = false;
519   MachineInstr *LastShapeMI = nullptr;
520   MachineInstr *LastTileCfg = nullptr;
521   bool HasUnconfigTile = false;
522 
523   auto Config = [&](MachineInstr &Before) {
524     if (CfgSS == -1)
525       CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
526                                      ST->getTileConfigAlignment(), false);
527     LastTileCfg = addFrameReference(
528         BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
529     LastShapeMI = nullptr;
530     Change = true;
531   };
532   auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
533     for (const MachineOperand &MO : MI.operands()) {
534       if (!MO.isReg())
535         continue;
536       Register Reg = MO.getReg();
537       if (Reg.isVirtual() && isTileRegister(MRI, Reg))
538         return true;
539     }
540     return false;
541   };
542   for (MachineInstr &MI : reverse(MBB)) {
543     // We have transformed phi node before configuring BB.
544     if (MI.isPHI())
545       break;
546     // Don't collect the shape of used tile, the tile should be defined
547     // before the tile use. Spill and reload would happen if there is only
548     // tile use after ldtilecfg, so the shape can be collected from reload.
549     // Take below code for example. %t would be reloaded before tilestore
550     // call
551     // ....
552     // tilestore %r, %c, %t
553     // -->
554     // call
555     // ldtilecfg
556     // %t = tileload %r, %c
557     // tilestore %r, %c, %t
558     if (HasTileOperand(MRI, MI))
559       HasUnconfigTile = true;
560     // According to AMX ABI, all the tile registers including config register
561     // are volatile. Caller need to save/restore config register.
562     if (MI.isCall() && HasUnconfigTile) {
563       MachineBasicBlock::iterator I;
564       if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
565         I = ++LastShapeMI->getIterator();
566       else
567         I = ++MI.getIterator();
568       Config(*I);
569       HasUnconfigTile = false;
570       continue;
571     }
572     if (!isTileDef(MRI, MI))
573       continue;
574     //
575     //---------------------------------------------------------------------
576     // Don't handle COPY instruction. If the src and dst of the COPY can be
577     // in the same config in below case, we just check the shape of t0.
578     // def row0
579     // def col0
580     // ldtilecfg
581     // t0 = tielzero(row0, col0)
582     // t1 = copy t0
583     // ...
584     // If the src and dst of the COPY can NOT be in the same config in below
585     // case. Reload would be generated befor the copy instruction.
586     // def row0
587     // def col0
588     // t0 = tielzero(row0, col0)
589     // spill t0
590     // ...
591     // def row1
592     // def col1
593     // ldtilecfg
594     // t1 = tilezero(row1, col1)
595     // reload t0
596     // t1 = copy t0
597     //---------------------------------------------------------------------
598     //
599     // If MI dominate the last shape def instruction, we need insert
600     // ldtilecfg after LastShapeMI now. The config doesn't include
601     // current MI.
602     //   def row0
603     //   def col0
604     //   tilezero(row0, col0)  <- MI
605     //   def row1
606     //   def col1
607     //   ldtilecfg             <- insert
608     //   tilezero(row1, col1)
609     if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
610       Config(*(++LastShapeMI->getIterator()));
611     MachineOperand *RowMO = &MI.getOperand(1);
612     MachineOperand *ColMO = &MI.getOperand(2);
613     MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
614     MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
615     // If the shape is defined in current MBB, check the domination.
616     // FIXME how about loop?
617     if (RowMI->getParent() == &MBB) {
618       if (!LastShapeMI)
619         LastShapeMI = RowMI;
620       else if (dominates(MBB, LastShapeMI, RowMI))
621         LastShapeMI = RowMI;
622     }
623     if (ColMI->getParent() == &MBB) {
624       if (!LastShapeMI)
625         LastShapeMI = ColMI;
626       else if (dominates(MBB, LastShapeMI, ColMI))
627         LastShapeMI = ColMI;
628     }
629     unsigned TileDefNum = getTileDefNum(MRI, MI.getOperand(0).getReg());
630     if (TileDefNum > 1) {
631       for (unsigned I = 1; I < TileDefNum; I++) {
632         MachineOperand *ColxMO = &MI.getOperand(2 + I);
633         MachineInstr *ColxMI = MRI->getVRegDef(ColxMO->getReg());
634         if (ColxMI->getParent() == &MBB) {
635           if (!LastShapeMI)
636             LastShapeMI = ColxMI;
637           else if (dominates(MBB, LastShapeMI, ColxMI))
638             LastShapeMI = ColxMI;
639         }
640       }
641     }
642     // If there is user live out of the tilecfg, spill it and reload in
643     // before the user.
644     Register TileReg = MI.getOperand(0).getReg();
645     if (mayLiveOut(TileReg, LastTileCfg))
646       spill(++MI.getIterator(), TileReg, false);
647     for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
648       if (UseMI.getParent() == &MBB) {
649         // check user should not across ldtilecfg
650         if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
651           continue;
652         // reload befor UseMI
653         reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
654       } else {
655         // Don't reload for phi instruction, we handle phi reload separately.
656         // TODO: merge the reload for the same user MBB.
657         if (!UseMI.isPHI())
658           reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
659       }
660     }
661   }
662 
663   // Configure tile registers at the head of the MBB
664   if (HasUnconfigTile) {
665     MachineInstr *Before;
666     if (LastShapeMI == nullptr || LastShapeMI->isPHI())
667       Before = &*MBB.getFirstNonPHI();
668     else
669       Before = &*(++LastShapeMI->getIterator());
670 
671     Config(*Before);
672   }
673 
674   return Change;
675 }
676 
677 bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
678   X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
679   // Early exit in the common case of non-AMX code.
680   if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
681     return false;
682 
683   MF = &MFunc;
684   MRI = &MFunc.getRegInfo();
685   ST = &MFunc.getSubtarget<X86Subtarget>();
686   TII = ST->getInstrInfo();
687   MFI = &MFunc.getFrameInfo();
688   TRI = ST->getRegisterInfo();
689   CfgSS = -1;
690 
691   unsigned NumVirtRegs = MRI->getNumVirtRegs();
692 
693   StackSlotForVirtReg.resize(NumVirtRegs);
694   MayLiveAcrossBlocks.clear();
695   // We will create register during config. *3 is to make sure
696   // the virtual register number doesn't exceed the size of
697   // the bit vector.
698   MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
699   bool Change = false;
700   assert(MRI->isSSA());
701 
702   // Canonicalize the phi node first.
703   for (MachineBasicBlock &MBB : MFunc)
704     canonicalizePHIs(MBB);
705 
706   // Loop over all of the basic blocks in reverse post order and insert
707   // ldtilecfg for tile registers. The reserse post order is to facilitate
708   // PHI node convert.
709   ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
710   for (MachineBasicBlock *MBB : RPOT) {
711     convertPHIs(*MBB);
712     Change |= configBasicBlock(*MBB);
713   }
714 
715   if (Change)
716     InitializeTileConfigStackSpace();
717 
718   StackSlotForVirtReg.clear();
719   return Change;
720 }
721 
722 FunctionPass *llvm::createX86FastPreTileConfigPass() {
723   return new X86FastPreTileConfig();
724 }
725