xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1*fe6060f1SDimitry Andric //===-- X86PreTileConfig.cpp - Tile Register Pre-configure-----------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9*fe6060f1SDimitry Andric /// \file Pass to pre-config the shapes of AMX registers
10*fe6060f1SDimitry Andric /// AMX register needs to be configured before use. The shapes of AMX register
11*fe6060f1SDimitry Andric /// are encoded in the 1st and 2nd machine operand of AMX pseudo instructions.
12e8d8bef9SDimitry Andric ///
13*fe6060f1SDimitry Andric /// The instruction ldtilecfg is used to config the shapes. It must be reachable
14*fe6060f1SDimitry Andric /// for all variable shapes. ldtilecfg will be inserted more than once if we
15*fe6060f1SDimitry Andric /// cannot find a dominating point for all AMX instructions.
16e8d8bef9SDimitry Andric ///
17*fe6060f1SDimitry Andric /// The configure register is caller saved according to ABI. We need to insert
18*fe6060f1SDimitry Andric /// ldtilecfg again after the call instruction if callee clobbers any AMX
19*fe6060f1SDimitry Andric /// registers.
20e8d8bef9SDimitry Andric ///
21*fe6060f1SDimitry Andric /// This pass calculates all points that ldtilecfg need to be inserted to and
22*fe6060f1SDimitry Andric /// insert them. It reports error if the reachability conditions aren't met.
23e8d8bef9SDimitry Andric //
24e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
25e8d8bef9SDimitry Andric 
26e8d8bef9SDimitry Andric #include "X86.h"
27e8d8bef9SDimitry Andric #include "X86InstrBuilder.h"
28e8d8bef9SDimitry Andric #include "X86RegisterInfo.h"
29e8d8bef9SDimitry Andric #include "X86Subtarget.h"
30e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
31e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
32*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
33e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
34e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h"
35e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
36e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
37e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
38e8d8bef9SDimitry Andric 
39e8d8bef9SDimitry Andric using namespace llvm;
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-pre-config"
42*fe6060f1SDimitry Andric #define REPORT_CONFIG_FAIL                                                     \
43*fe6060f1SDimitry Andric   report_fatal_error(                                                          \
44*fe6060f1SDimitry Andric       MF.getName() +                                                           \
45*fe6060f1SDimitry Andric       ": Failed to config tile register, please define the shape earlier");
46e8d8bef9SDimitry Andric 
47e8d8bef9SDimitry Andric namespace {
48e8d8bef9SDimitry Andric 
49*fe6060f1SDimitry Andric struct MIRef {
50*fe6060f1SDimitry Andric   MachineInstr *MI = nullptr;
51*fe6060f1SDimitry Andric   MachineBasicBlock *MBB = nullptr;
52*fe6060f1SDimitry Andric   // A virtual position for instruction that will be inserted after MI.
53*fe6060f1SDimitry Andric   size_t Pos = 0;
54*fe6060f1SDimitry Andric   MIRef() = default;
55*fe6060f1SDimitry Andric   MIRef(MachineBasicBlock *MBB) : MBB(MBB) {
56*fe6060f1SDimitry Andric     for (auto I = MBB->begin(), E = MBB->end(); I != E && I->isPHI();
57*fe6060f1SDimitry Andric          ++I, ++Pos)
58*fe6060f1SDimitry Andric       MI = &*I;
59*fe6060f1SDimitry Andric   }
60*fe6060f1SDimitry Andric   MIRef(MachineInstr *MI)
61*fe6060f1SDimitry Andric       : MI(MI), MBB(MI->getParent()),
62*fe6060f1SDimitry Andric         Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
63*fe6060f1SDimitry Andric   MIRef(MachineInstr *MI, MachineBasicBlock *MBB)
64*fe6060f1SDimitry Andric       : MI(MI), MBB(MBB),
65*fe6060f1SDimitry Andric         Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
66*fe6060f1SDimitry Andric   MIRef(MachineInstr *MI, MachineBasicBlock *MBB, size_t Pos)
67*fe6060f1SDimitry Andric       : MI(MI), MBB(MBB), Pos(Pos) {}
68*fe6060f1SDimitry Andric   operator bool() const { return MBB != nullptr; }
69*fe6060f1SDimitry Andric   bool operator==(const MIRef &RHS) const {
70*fe6060f1SDimitry Andric     return MI == RHS.MI && MBB == RHS.MBB;
71*fe6060f1SDimitry Andric   }
72*fe6060f1SDimitry Andric   bool operator!=(const MIRef &RHS) const { return !(*this == RHS); }
73*fe6060f1SDimitry Andric   bool operator<(const MIRef &RHS) const {
74*fe6060f1SDimitry Andric     // Comparison between different BBs happens when inserting a MIRef into set.
75*fe6060f1SDimitry Andric     // So we compare MBB first to make the insertion happy.
76*fe6060f1SDimitry Andric     return MBB < RHS.MBB || (MBB == RHS.MBB && Pos < RHS.Pos);
77*fe6060f1SDimitry Andric   }
78*fe6060f1SDimitry Andric   bool operator>(const MIRef &RHS) const {
79*fe6060f1SDimitry Andric     // Comparison between different BBs happens when inserting a MIRef into set.
80*fe6060f1SDimitry Andric     // So we compare MBB first to make the insertion happy.
81*fe6060f1SDimitry Andric     return MBB > RHS.MBB || (MBB == RHS.MBB && Pos > RHS.Pos);
82*fe6060f1SDimitry Andric   }
83*fe6060f1SDimitry Andric };
84e8d8bef9SDimitry Andric 
85*fe6060f1SDimitry Andric struct BBInfo {
86*fe6060f1SDimitry Andric   MIRef FirstAMX;
87*fe6060f1SDimitry Andric   MIRef LastCall;
88*fe6060f1SDimitry Andric   bool HasAMXRegLiveIn = false;
89*fe6060f1SDimitry Andric   bool TileCfgForbidden = false;
90*fe6060f1SDimitry Andric   bool NeedTileCfgLiveIn = false;
91*fe6060f1SDimitry Andric };
92*fe6060f1SDimitry Andric 
93*fe6060f1SDimitry Andric class X86PreTileConfig : public MachineFunctionPass {
94*fe6060f1SDimitry Andric   MachineRegisterInfo *MRI;
95*fe6060f1SDimitry Andric   const MachineLoopInfo *MLI;
96*fe6060f1SDimitry Andric   SmallSet<MachineInstr *, 8> DefVisited;
97*fe6060f1SDimitry Andric   DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;
98*fe6060f1SDimitry Andric   DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs;
99*fe6060f1SDimitry Andric 
100*fe6060f1SDimitry Andric   /// Check if the callee will clobber AMX registers.
101*fe6060f1SDimitry Andric   bool isDestructiveCall(MachineInstr &MI, BitVector UsableRegs) {
102*fe6060f1SDimitry Andric     auto Iter = llvm::find_if(
103*fe6060f1SDimitry Andric         MI.operands(), [](MachineOperand &MO) { return MO.isRegMask(); });
104*fe6060f1SDimitry Andric     if (Iter == MI.operands_end())
105*fe6060f1SDimitry Andric       return false;
106*fe6060f1SDimitry Andric     UsableRegs.clearBitsInMask(Iter->getRegMask());
107*fe6060f1SDimitry Andric     return !UsableRegs.none();
108*fe6060f1SDimitry Andric   }
109*fe6060f1SDimitry Andric 
110*fe6060f1SDimitry Andric   /// Check if MI is AMX pseudo instruction.
111*fe6060f1SDimitry Andric   bool isAMXInstruction(MachineInstr &MI) {
112*fe6060f1SDimitry Andric     if (MI.isPHI() || MI.isDebugInstr() || MI.getNumOperands() < 3)
113*fe6060f1SDimitry Andric       return false;
114*fe6060f1SDimitry Andric     MachineOperand &MO = MI.getOperand(0);
115*fe6060f1SDimitry Andric     // We can simply check if it is AMX instruction by its def.
116*fe6060f1SDimitry Andric     // But we should exclude old API which uses physical registers.
117*fe6060f1SDimitry Andric     if (MO.isReg() && MO.getReg().isVirtual() &&
118*fe6060f1SDimitry Andric         MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) {
119*fe6060f1SDimitry Andric       collectShapeInfo(MI);
120*fe6060f1SDimitry Andric       return true;
121*fe6060f1SDimitry Andric     }
122*fe6060f1SDimitry Andric     // PTILESTOREDV is the only exception that doesn't def a AMX register.
123*fe6060f1SDimitry Andric     return MI.getOpcode() == X86::PTILESTOREDV;
124*fe6060f1SDimitry Andric   }
125*fe6060f1SDimitry Andric 
126*fe6060f1SDimitry Andric   /// Check if it is an edge from loop bottom to loop head.
127*fe6060f1SDimitry Andric   bool isLoopBackEdge(MachineBasicBlock *Header, MachineBasicBlock *Bottom) {
128*fe6060f1SDimitry Andric     if (!MLI->isLoopHeader(Header))
129*fe6060f1SDimitry Andric       return false;
130*fe6060f1SDimitry Andric     auto *ML = MLI->getLoopFor(Header);
131*fe6060f1SDimitry Andric     if (ML->contains(Bottom) && ML->isLoopLatch(Bottom))
132*fe6060f1SDimitry Andric       return true;
133*fe6060f1SDimitry Andric 
134*fe6060f1SDimitry Andric     return false;
135*fe6060f1SDimitry Andric   }
136*fe6060f1SDimitry Andric 
137*fe6060f1SDimitry Andric   /// Collect the shape def information for later use.
138*fe6060f1SDimitry Andric   void collectShapeInfo(MachineInstr &MI);
139*fe6060f1SDimitry Andric 
140*fe6060f1SDimitry Andric   /// Try to hoist shapes definded below AMX instructions.
141*fe6060f1SDimitry Andric   bool hoistShapesInBB(MachineBasicBlock *MBB, SmallVectorImpl<MIRef> &Shapes) {
142*fe6060f1SDimitry Andric     MIRef &FirstAMX = BBVisitedInfo[MBB].FirstAMX;
143*fe6060f1SDimitry Andric     auto FirstShapeBelowAMX = llvm::lower_bound(Shapes, FirstAMX);
144*fe6060f1SDimitry Andric     auto InsertPoint = FirstAMX.MI->getIterator();
145*fe6060f1SDimitry Andric     for (auto I = FirstShapeBelowAMX, E = Shapes.end(); I != E; ++I) {
146*fe6060f1SDimitry Andric       // Do not hoist instructions that access memory.
147*fe6060f1SDimitry Andric       if (I->MI->mayLoadOrStore())
148*fe6060f1SDimitry Andric         return false;
149*fe6060f1SDimitry Andric       for (auto &MO : I->MI->operands()) {
150*fe6060f1SDimitry Andric         if (MO.isDef())
151*fe6060f1SDimitry Andric           continue;
152*fe6060f1SDimitry Andric         // Do not hoist instructions if the sources' def under AMX instruction.
153*fe6060f1SDimitry Andric         // TODO: We can handle isMoveImmediate MI here.
154*fe6060f1SDimitry Andric         if (MO.isReg() && MIRef(MRI->getVRegDef(MO.getReg())) > FirstAMX)
155*fe6060f1SDimitry Andric           return false;
156*fe6060f1SDimitry Andric         // TODO: Maybe need more checks here.
157*fe6060f1SDimitry Andric       }
158*fe6060f1SDimitry Andric       MBB->insert(InsertPoint, I->MI->removeFromParent());
159*fe6060f1SDimitry Andric     }
160*fe6060f1SDimitry Andric     // We only need to mark the last shape in the BB now.
161*fe6060f1SDimitry Andric     Shapes.clear();
162*fe6060f1SDimitry Andric     Shapes.push_back(MIRef(&*--InsertPoint, MBB));
163*fe6060f1SDimitry Andric     return true;
164*fe6060f1SDimitry Andric   }
165e8d8bef9SDimitry Andric 
166e8d8bef9SDimitry Andric public:
167e8d8bef9SDimitry Andric   X86PreTileConfig() : MachineFunctionPass(ID) {}
168e8d8bef9SDimitry Andric 
169e8d8bef9SDimitry Andric   /// Return the pass name.
170e8d8bef9SDimitry Andric   StringRef getPassName() const override {
171e8d8bef9SDimitry Andric     return "Tile Register Pre-configure";
172e8d8bef9SDimitry Andric   }
173e8d8bef9SDimitry Andric 
174e8d8bef9SDimitry Andric   /// X86PreTileConfig analysis usage.
175*fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
176*fe6060f1SDimitry Andric     AU.setPreservesAll();
177*fe6060f1SDimitry Andric     AU.addRequired<MachineLoopInfo>();
178*fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
179*fe6060f1SDimitry Andric   }
180e8d8bef9SDimitry Andric 
181*fe6060f1SDimitry Andric   /// Clear MF related structures.
182*fe6060f1SDimitry Andric   void releaseMemory() override {
183*fe6060f1SDimitry Andric     ShapeBBs.clear();
184*fe6060f1SDimitry Andric     DefVisited.clear();
185*fe6060f1SDimitry Andric     BBVisitedInfo.clear();
186*fe6060f1SDimitry Andric   }
187*fe6060f1SDimitry Andric 
188*fe6060f1SDimitry Andric   /// Perform ldtilecfg instructions inserting.
189*fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
190e8d8bef9SDimitry Andric 
191e8d8bef9SDimitry Andric   static char ID;
192e8d8bef9SDimitry Andric };
193e8d8bef9SDimitry Andric 
194e8d8bef9SDimitry Andric } // end anonymous namespace
195e8d8bef9SDimitry Andric 
196e8d8bef9SDimitry Andric char X86PreTileConfig::ID = 0;
197e8d8bef9SDimitry Andric 
198e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86PreTileConfig, "tilepreconfig",
199*fe6060f1SDimitry Andric                       "Tile Register Pre-configure", false, false)
200*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
201e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig",
202*fe6060f1SDimitry Andric                     "Tile Register Pre-configure", false, false)
203e8d8bef9SDimitry Andric 
204*fe6060f1SDimitry Andric void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) {
205*fe6060f1SDimitry Andric   auto RecordShape = [&](MachineInstr *MI, MachineBasicBlock *MBB) {
206*fe6060f1SDimitry Andric     MIRef MIR(MI, MBB);
207*fe6060f1SDimitry Andric     auto I = llvm::lower_bound(ShapeBBs[MBB], MIR);
208*fe6060f1SDimitry Andric     if (I == ShapeBBs[MBB].end() || *I != MIR)
209*fe6060f1SDimitry Andric       ShapeBBs[MBB].insert(I, MIR);
210*fe6060f1SDimitry Andric   };
211*fe6060f1SDimitry Andric 
212*fe6060f1SDimitry Andric   SmallVector<Register, 8> WorkList(
213*fe6060f1SDimitry Andric       {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
214*fe6060f1SDimitry Andric   while (!WorkList.empty()) {
215*fe6060f1SDimitry Andric     Register R = WorkList.pop_back_val();
216*fe6060f1SDimitry Andric     MachineInstr *DefMI = MRI->getVRegDef(R);
217*fe6060f1SDimitry Andric     assert(DefMI && "R must has one define instruction");
218*fe6060f1SDimitry Andric     MachineBasicBlock *DefMBB = DefMI->getParent();
219*fe6060f1SDimitry Andric     if (DefMI->isMoveImmediate() || !DefVisited.insert(DefMI).second)
220*fe6060f1SDimitry Andric       continue;
221*fe6060f1SDimitry Andric     if (DefMI->isPHI()) {
222*fe6060f1SDimitry Andric       for (unsigned I = 1; I < DefMI->getNumOperands(); I += 2)
223*fe6060f1SDimitry Andric         if (isLoopBackEdge(DefMBB, DefMI->getOperand(I + 1).getMBB()))
224*fe6060f1SDimitry Andric           RecordShape(DefMI, DefMBB); // In this case, PHI is also a shape def.
225*fe6060f1SDimitry Andric         else
226*fe6060f1SDimitry Andric           WorkList.push_back(DefMI->getOperand(I).getReg());
227*fe6060f1SDimitry Andric     } else {
228*fe6060f1SDimitry Andric       RecordShape(DefMI, DefMBB);
229*fe6060f1SDimitry Andric     }
230*fe6060f1SDimitry Andric   }
231e8d8bef9SDimitry Andric }
232e8d8bef9SDimitry Andric 
233*fe6060f1SDimitry Andric bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
234*fe6060f1SDimitry Andric   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
235*fe6060f1SDimitry Andric   const TargetInstrInfo *TII = ST.getInstrInfo();
236*fe6060f1SDimitry Andric   const TargetRegisterInfo *TRI = ST.getRegisterInfo();
237*fe6060f1SDimitry Andric   const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
238e8d8bef9SDimitry Andric 
239*fe6060f1SDimitry Andric   BitVector AMXRegs(TRI->getNumRegs());
240*fe6060f1SDimitry Andric   for (unsigned I = 0; I < RC->getNumRegs(); I++)
241*fe6060f1SDimitry Andric     AMXRegs.set(X86::TMM0 + I);
242*fe6060f1SDimitry Andric 
243*fe6060f1SDimitry Andric   // Iterate MF to collect information.
244*fe6060f1SDimitry Andric   MRI = &MF.getRegInfo();
245*fe6060f1SDimitry Andric   MLI = &getAnalysis<MachineLoopInfo>();
246*fe6060f1SDimitry Andric   SmallSet<MIRef, 8> CfgNeedInsert;
247*fe6060f1SDimitry Andric   SmallVector<MachineBasicBlock *, 8> CfgLiveInBBs;
248*fe6060f1SDimitry Andric   for (auto &MBB : MF) {
249*fe6060f1SDimitry Andric     size_t Pos = 0;
250*fe6060f1SDimitry Andric     for (auto &MI : MBB) {
251*fe6060f1SDimitry Andric       ++Pos;
252*fe6060f1SDimitry Andric       if (isAMXInstruction(MI)) {
253*fe6060f1SDimitry Andric         // If there's call before the AMX, we need to reload tile config.
254*fe6060f1SDimitry Andric         if (BBVisitedInfo[&MBB].LastCall)
255*fe6060f1SDimitry Andric           CfgNeedInsert.insert(BBVisitedInfo[&MBB].LastCall);
256*fe6060f1SDimitry Andric         else // Otherwise, we need tile config to live in this BB.
257*fe6060f1SDimitry Andric           BBVisitedInfo[&MBB].NeedTileCfgLiveIn = true;
258*fe6060f1SDimitry Andric         // Always record the first AMX in case there's shape def after it.
259*fe6060f1SDimitry Andric         if (!BBVisitedInfo[&MBB].FirstAMX)
260*fe6060f1SDimitry Andric           BBVisitedInfo[&MBB].FirstAMX = MIRef(&MI, &MBB, Pos);
261*fe6060f1SDimitry Andric       } else if (MI.isCall() && isDestructiveCall(MI, AMXRegs)) {
262*fe6060f1SDimitry Andric         // Record the call only if the callee clobbers all AMX registers.
263*fe6060f1SDimitry Andric         BBVisitedInfo[&MBB].LastCall = MIRef(&MI, &MBB, Pos);
264*fe6060f1SDimitry Andric       }
265*fe6060f1SDimitry Andric     }
266*fe6060f1SDimitry Andric     if (BBVisitedInfo[&MBB].NeedTileCfgLiveIn) {
267*fe6060f1SDimitry Andric       if (&MBB == &MF.front())
268*fe6060f1SDimitry Andric         CfgNeedInsert.insert(MIRef(&MBB));
269*fe6060f1SDimitry Andric       else
270*fe6060f1SDimitry Andric         CfgLiveInBBs.push_back(&MBB);
271*fe6060f1SDimitry Andric     }
272*fe6060f1SDimitry Andric     if (BBVisitedInfo[&MBB].FirstAMX || BBVisitedInfo[&MBB].HasAMXRegLiveIn)
273*fe6060f1SDimitry Andric       for (auto *Succ : MBB.successors())
274*fe6060f1SDimitry Andric         if (!isLoopBackEdge(Succ, &MBB))
275*fe6060f1SDimitry Andric           BBVisitedInfo[Succ].HasAMXRegLiveIn = true;
276*fe6060f1SDimitry Andric   }
277*fe6060f1SDimitry Andric 
278*fe6060f1SDimitry Andric   // Update NeedTileCfgLiveIn for predecessors.
279*fe6060f1SDimitry Andric   while (!CfgLiveInBBs.empty()) {
280*fe6060f1SDimitry Andric     MachineBasicBlock *MBB = CfgLiveInBBs.pop_back_val();
281*fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors()) {
282*fe6060f1SDimitry Andric       if (BBVisitedInfo[Pred].LastCall) {
283*fe6060f1SDimitry Andric         CfgNeedInsert.insert(BBVisitedInfo[Pred].LastCall);
284*fe6060f1SDimitry Andric       } else if (!BBVisitedInfo[Pred].NeedTileCfgLiveIn) {
285*fe6060f1SDimitry Andric         BBVisitedInfo[Pred].NeedTileCfgLiveIn = true;
286*fe6060f1SDimitry Andric         if (Pred == &MF.front())
287*fe6060f1SDimitry Andric           CfgNeedInsert.insert(MIRef(Pred));
288*fe6060f1SDimitry Andric         else
289*fe6060f1SDimitry Andric           CfgLiveInBBs.push_back(Pred);
290*fe6060f1SDimitry Andric       }
291*fe6060f1SDimitry Andric     }
292*fe6060f1SDimitry Andric   }
293*fe6060f1SDimitry Andric 
294*fe6060f1SDimitry Andric   // There's no AMX instruction if we didn't find a tile config live in point.
295*fe6060f1SDimitry Andric   if (CfgNeedInsert.empty())
296*fe6060f1SDimitry Andric     return false;
297*fe6060f1SDimitry Andric 
298*fe6060f1SDimitry Andric   // Avoid to insert ldtilecfg before any shape defs.
299*fe6060f1SDimitry Andric   SmallVector<MachineBasicBlock *, 8> WorkList;
300*fe6060f1SDimitry Andric   for (auto &I : ShapeBBs) {
301*fe6060f1SDimitry Andric     // TODO: We can hoist shapes across BBs here.
302*fe6060f1SDimitry Andric     if (BBVisitedInfo[I.first].HasAMXRegLiveIn)
303*fe6060f1SDimitry Andric       REPORT_CONFIG_FAIL
304*fe6060f1SDimitry Andric     if (BBVisitedInfo[I.first].FirstAMX &&
305*fe6060f1SDimitry Andric         BBVisitedInfo[I.first].FirstAMX < I.second.back() &&
306*fe6060f1SDimitry Andric         !hoistShapesInBB(I.first, I.second))
307*fe6060f1SDimitry Andric       REPORT_CONFIG_FAIL
308*fe6060f1SDimitry Andric     WorkList.push_back(I.first);
309*fe6060f1SDimitry Andric   }
310*fe6060f1SDimitry Andric   while (!WorkList.empty()) {
311*fe6060f1SDimitry Andric     MachineBasicBlock *MBB = WorkList.pop_back_val();
312*fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors()) {
313*fe6060f1SDimitry Andric       if (!BBVisitedInfo[Pred].TileCfgForbidden && !isLoopBackEdge(MBB, Pred)) {
314*fe6060f1SDimitry Andric         BBVisitedInfo[Pred].TileCfgForbidden = true;
315*fe6060f1SDimitry Andric         WorkList.push_back(Pred);
316*fe6060f1SDimitry Andric       }
317*fe6060f1SDimitry Andric     }
318*fe6060f1SDimitry Andric   }
319*fe6060f1SDimitry Andric 
320*fe6060f1SDimitry Andric   DebugLoc DL;
321*fe6060f1SDimitry Andric   SmallSet<MIRef, 8> VisitedOrInserted;
322*fe6060f1SDimitry Andric   int SS = MF.getFrameInfo().CreateStackObject(
323*fe6060f1SDimitry Andric       ST.getTileConfigSize(), ST.getTileConfigAlignment(), false);
324*fe6060f1SDimitry Andric 
325*fe6060f1SDimitry Andric   // Try to insert for the tile config live in points.
326*fe6060f1SDimitry Andric   for (auto I : CfgNeedInsert) {
327*fe6060f1SDimitry Andric     SmallSet<MIRef, 8> InsertPoints;
328*fe6060f1SDimitry Andric     SmallVector<MIRef, 8> WorkList({I});
329*fe6060f1SDimitry Andric     while (!WorkList.empty()) {
330*fe6060f1SDimitry Andric       MIRef I = WorkList.pop_back_val();
331*fe6060f1SDimitry Andric       if (!VisitedOrInserted.count(I)) {
332*fe6060f1SDimitry Andric         if (!BBVisitedInfo[I.MBB].TileCfgForbidden) {
333*fe6060f1SDimitry Andric           // If the BB is all shapes reachable, stop sink and try to insert.
334*fe6060f1SDimitry Andric           InsertPoints.insert(I);
335*fe6060f1SDimitry Andric         } else {
336*fe6060f1SDimitry Andric           // Avoid the BB to be multi visited.
337*fe6060f1SDimitry Andric           VisitedOrInserted.insert(I);
338*fe6060f1SDimitry Andric           // Sink the inserting point along the chain with NeedTileCfgLiveIn =
339*fe6060f1SDimitry Andric           // true when MBB isn't all shapes reachable.
340*fe6060f1SDimitry Andric           for (auto *Succ : I.MBB->successors())
341*fe6060f1SDimitry Andric             if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
342*fe6060f1SDimitry Andric               WorkList.push_back(MIRef(Succ));
343*fe6060f1SDimitry Andric         }
344*fe6060f1SDimitry Andric       }
345*fe6060f1SDimitry Andric     }
346*fe6060f1SDimitry Andric 
347*fe6060f1SDimitry Andric     // A given point might be forked due to shape conditions are not met.
348*fe6060f1SDimitry Andric     for (MIRef I : InsertPoints) {
349*fe6060f1SDimitry Andric       // Make sure we insert ldtilecfg after the last shape def in MBB.
350*fe6060f1SDimitry Andric       if (ShapeBBs.count(I.MBB) && I < ShapeBBs[I.MBB].back())
351*fe6060f1SDimitry Andric         I = ShapeBBs[I.MBB].back();
352*fe6060f1SDimitry Andric       // There're chances the MBB is sunk more than once. Record it to avoid
353*fe6060f1SDimitry Andric       // multi insert.
354*fe6060f1SDimitry Andric       if (VisitedOrInserted.insert(I).second) {
355*fe6060f1SDimitry Andric         auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin();
356*fe6060f1SDimitry Andric         addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::LDTILECFG)),
357*fe6060f1SDimitry Andric                           SS);
358*fe6060f1SDimitry Andric       }
359*fe6060f1SDimitry Andric     }
360*fe6060f1SDimitry Andric   }
361*fe6060f1SDimitry Andric 
362e8d8bef9SDimitry Andric   // Zero stack slot.
363*fe6060f1SDimitry Andric   MachineBasicBlock &MBB = MF.front();
364*fe6060f1SDimitry Andric   MachineInstr *MI = &*MBB.begin();
365*fe6060f1SDimitry Andric   if (ST.hasAVX512()) {
366e8d8bef9SDimitry Andric     Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
367*fe6060f1SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::VPXORDZrr), Zmm)
368e8d8bef9SDimitry Andric         .addReg(Zmm, RegState::Undef)
369e8d8bef9SDimitry Andric         .addReg(Zmm, RegState::Undef);
370*fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), SS)
371e8d8bef9SDimitry Andric         .addReg(Zmm);
372*fe6060f1SDimitry Andric   } else if (ST.hasAVX2()) {
373*fe6060f1SDimitry Andric     Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
374*fe6060f1SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::VPXORYrr), Ymm)
375*fe6060f1SDimitry Andric         .addReg(Ymm, RegState::Undef)
376*fe6060f1SDimitry Andric         .addReg(Ymm, RegState::Undef);
377*fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS)
378*fe6060f1SDimitry Andric         .addReg(Ymm);
379*fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS, 32)
380*fe6060f1SDimitry Andric         .addReg(Ymm);
381*fe6060f1SDimitry Andric   } else {
382*fe6060f1SDimitry Andric     assert(ST.hasSSE2() && "AMX should assume SSE2 enabled");
383*fe6060f1SDimitry Andric     Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
384*fe6060f1SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::PXORrr), Xmm)
385*fe6060f1SDimitry Andric         .addReg(Xmm, RegState::Undef)
386*fe6060f1SDimitry Andric         .addReg(Xmm, RegState::Undef);
387*fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS)
388*fe6060f1SDimitry Andric         .addReg(Xmm);
389*fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 16)
390*fe6060f1SDimitry Andric         .addReg(Xmm);
391*fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 32)
392*fe6060f1SDimitry Andric         .addReg(Xmm);
393*fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 48)
394*fe6060f1SDimitry Andric         .addReg(Xmm);
395e8d8bef9SDimitry Andric   }
396*fe6060f1SDimitry Andric   // Fill in the palette first.
397*fe6060f1SDimitry Andric   addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), SS).addImm(1);
398e8d8bef9SDimitry Andric 
399e8d8bef9SDimitry Andric   return true;
400e8d8bef9SDimitry Andric }
401e8d8bef9SDimitry Andric 
402e8d8bef9SDimitry Andric FunctionPass *llvm::createX86PreTileConfigPass() {
403e8d8bef9SDimitry Andric   return new X86PreTileConfig();
404e8d8bef9SDimitry Andric }
405