xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
1fe6060f1SDimitry Andric //===-- X86PreTileConfig.cpp - Tile Register Pre-configure-----------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9fe6060f1SDimitry Andric /// \file Pass to pre-config the shapes of AMX registers
10fe6060f1SDimitry Andric /// AMX register needs to be configured before use. The shapes of AMX register
11fe6060f1SDimitry Andric /// are encoded in the 1st and 2nd machine operand of AMX pseudo instructions.
12e8d8bef9SDimitry Andric ///
13fe6060f1SDimitry Andric /// The instruction ldtilecfg is used to config the shapes. It must be reachable
14fe6060f1SDimitry Andric /// for all variable shapes. ldtilecfg will be inserted more than once if we
15fe6060f1SDimitry Andric /// cannot find a dominating point for all AMX instructions.
16e8d8bef9SDimitry Andric ///
17fe6060f1SDimitry Andric /// The configure register is caller saved according to ABI. We need to insert
18fe6060f1SDimitry Andric /// ldtilecfg again after the call instruction if callee clobbers any AMX
19fe6060f1SDimitry Andric /// registers.
20e8d8bef9SDimitry Andric ///
21fe6060f1SDimitry Andric /// This pass calculates all points that ldtilecfg need to be inserted to and
22fe6060f1SDimitry Andric /// insert them. It reports error if the reachability conditions aren't met.
23e8d8bef9SDimitry Andric //
24e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
25e8d8bef9SDimitry Andric 
26e8d8bef9SDimitry Andric #include "X86.h"
27e8d8bef9SDimitry Andric #include "X86InstrBuilder.h"
28*349cc55cSDimitry Andric #include "X86MachineFunctionInfo.h"
29e8d8bef9SDimitry Andric #include "X86RegisterInfo.h"
30e8d8bef9SDimitry Andric #include "X86Subtarget.h"
31e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
32e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
33fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
34e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
35e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h"
36e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
37e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
38e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
39e8d8bef9SDimitry Andric 
40e8d8bef9SDimitry Andric using namespace llvm;
41e8d8bef9SDimitry Andric 
42e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-pre-config"
43fe6060f1SDimitry Andric #define REPORT_CONFIG_FAIL                                                     \
44fe6060f1SDimitry Andric   report_fatal_error(                                                          \
45fe6060f1SDimitry Andric       MF.getName() +                                                           \
46fe6060f1SDimitry Andric       ": Failed to config tile register, please define the shape earlier");
47e8d8bef9SDimitry Andric 
48e8d8bef9SDimitry Andric namespace {
49e8d8bef9SDimitry Andric 
50fe6060f1SDimitry Andric struct MIRef {
51fe6060f1SDimitry Andric   MachineInstr *MI = nullptr;
52fe6060f1SDimitry Andric   MachineBasicBlock *MBB = nullptr;
53fe6060f1SDimitry Andric   // A virtual position for instruction that will be inserted after MI.
54fe6060f1SDimitry Andric   size_t Pos = 0;
55fe6060f1SDimitry Andric   MIRef() = default;
56fe6060f1SDimitry Andric   MIRef(MachineBasicBlock *MBB) : MBB(MBB) {
57fe6060f1SDimitry Andric     for (auto I = MBB->begin(), E = MBB->end(); I != E && I->isPHI();
58fe6060f1SDimitry Andric          ++I, ++Pos)
59fe6060f1SDimitry Andric       MI = &*I;
60fe6060f1SDimitry Andric   }
61fe6060f1SDimitry Andric   MIRef(MachineInstr *MI)
62fe6060f1SDimitry Andric       : MI(MI), MBB(MI->getParent()),
63fe6060f1SDimitry Andric         Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
64fe6060f1SDimitry Andric   MIRef(MachineInstr *MI, MachineBasicBlock *MBB)
65fe6060f1SDimitry Andric       : MI(MI), MBB(MBB),
66fe6060f1SDimitry Andric         Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
67fe6060f1SDimitry Andric   MIRef(MachineInstr *MI, MachineBasicBlock *MBB, size_t Pos)
68fe6060f1SDimitry Andric       : MI(MI), MBB(MBB), Pos(Pos) {}
69fe6060f1SDimitry Andric   operator bool() const { return MBB != nullptr; }
70fe6060f1SDimitry Andric   bool operator==(const MIRef &RHS) const {
71fe6060f1SDimitry Andric     return MI == RHS.MI && MBB == RHS.MBB;
72fe6060f1SDimitry Andric   }
73fe6060f1SDimitry Andric   bool operator!=(const MIRef &RHS) const { return !(*this == RHS); }
74fe6060f1SDimitry Andric   bool operator<(const MIRef &RHS) const {
75fe6060f1SDimitry Andric     // Comparison between different BBs happens when inserting a MIRef into set.
76fe6060f1SDimitry Andric     // So we compare MBB first to make the insertion happy.
77fe6060f1SDimitry Andric     return MBB < RHS.MBB || (MBB == RHS.MBB && Pos < RHS.Pos);
78fe6060f1SDimitry Andric   }
79fe6060f1SDimitry Andric   bool operator>(const MIRef &RHS) const {
80fe6060f1SDimitry Andric     // Comparison between different BBs happens when inserting a MIRef into set.
81fe6060f1SDimitry Andric     // So we compare MBB first to make the insertion happy.
82fe6060f1SDimitry Andric     return MBB > RHS.MBB || (MBB == RHS.MBB && Pos > RHS.Pos);
83fe6060f1SDimitry Andric   }
84fe6060f1SDimitry Andric };
85e8d8bef9SDimitry Andric 
86fe6060f1SDimitry Andric struct BBInfo {
87fe6060f1SDimitry Andric   MIRef FirstAMX;
88fe6060f1SDimitry Andric   MIRef LastCall;
89fe6060f1SDimitry Andric   bool HasAMXRegLiveIn = false;
90fe6060f1SDimitry Andric   bool TileCfgForbidden = false;
91fe6060f1SDimitry Andric   bool NeedTileCfgLiveIn = false;
92fe6060f1SDimitry Andric };
93fe6060f1SDimitry Andric 
94fe6060f1SDimitry Andric class X86PreTileConfig : public MachineFunctionPass {
95fe6060f1SDimitry Andric   MachineRegisterInfo *MRI;
96fe6060f1SDimitry Andric   const MachineLoopInfo *MLI;
97fe6060f1SDimitry Andric   SmallSet<MachineInstr *, 8> DefVisited;
98fe6060f1SDimitry Andric   DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;
99fe6060f1SDimitry Andric   DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs;
100fe6060f1SDimitry Andric 
101fe6060f1SDimitry Andric   /// Check if the callee will clobber AMX registers.
102fe6060f1SDimitry Andric   bool isDestructiveCall(MachineInstr &MI, BitVector UsableRegs) {
103fe6060f1SDimitry Andric     auto Iter = llvm::find_if(
104fe6060f1SDimitry Andric         MI.operands(), [](MachineOperand &MO) { return MO.isRegMask(); });
105fe6060f1SDimitry Andric     if (Iter == MI.operands_end())
106fe6060f1SDimitry Andric       return false;
107fe6060f1SDimitry Andric     UsableRegs.clearBitsInMask(Iter->getRegMask());
108fe6060f1SDimitry Andric     return !UsableRegs.none();
109fe6060f1SDimitry Andric   }
110fe6060f1SDimitry Andric 
111fe6060f1SDimitry Andric   /// Check if MI is AMX pseudo instruction.
112fe6060f1SDimitry Andric   bool isAMXInstruction(MachineInstr &MI) {
113fe6060f1SDimitry Andric     if (MI.isPHI() || MI.isDebugInstr() || MI.getNumOperands() < 3)
114fe6060f1SDimitry Andric       return false;
115fe6060f1SDimitry Andric     MachineOperand &MO = MI.getOperand(0);
116fe6060f1SDimitry Andric     // We can simply check if it is AMX instruction by its def.
117fe6060f1SDimitry Andric     // But we should exclude old API which uses physical registers.
118fe6060f1SDimitry Andric     if (MO.isReg() && MO.getReg().isVirtual() &&
119fe6060f1SDimitry Andric         MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) {
120fe6060f1SDimitry Andric       collectShapeInfo(MI);
121fe6060f1SDimitry Andric       return true;
122fe6060f1SDimitry Andric     }
123fe6060f1SDimitry Andric     // PTILESTOREDV is the only exception that doesn't def a AMX register.
124fe6060f1SDimitry Andric     return MI.getOpcode() == X86::PTILESTOREDV;
125fe6060f1SDimitry Andric   }
126fe6060f1SDimitry Andric 
127fe6060f1SDimitry Andric   /// Check if it is an edge from loop bottom to loop head.
128fe6060f1SDimitry Andric   bool isLoopBackEdge(MachineBasicBlock *Header, MachineBasicBlock *Bottom) {
129fe6060f1SDimitry Andric     if (!MLI->isLoopHeader(Header))
130fe6060f1SDimitry Andric       return false;
131fe6060f1SDimitry Andric     auto *ML = MLI->getLoopFor(Header);
132fe6060f1SDimitry Andric     if (ML->contains(Bottom) && ML->isLoopLatch(Bottom))
133fe6060f1SDimitry Andric       return true;
134fe6060f1SDimitry Andric 
135fe6060f1SDimitry Andric     return false;
136fe6060f1SDimitry Andric   }
137fe6060f1SDimitry Andric 
138fe6060f1SDimitry Andric   /// Collect the shape def information for later use.
139fe6060f1SDimitry Andric   void collectShapeInfo(MachineInstr &MI);
140fe6060f1SDimitry Andric 
141fe6060f1SDimitry Andric   /// Try to hoist shapes definded below AMX instructions.
142fe6060f1SDimitry Andric   bool hoistShapesInBB(MachineBasicBlock *MBB, SmallVectorImpl<MIRef> &Shapes) {
143fe6060f1SDimitry Andric     MIRef &FirstAMX = BBVisitedInfo[MBB].FirstAMX;
144fe6060f1SDimitry Andric     auto FirstShapeBelowAMX = llvm::lower_bound(Shapes, FirstAMX);
145fe6060f1SDimitry Andric     auto InsertPoint = FirstAMX.MI->getIterator();
146fe6060f1SDimitry Andric     for (auto I = FirstShapeBelowAMX, E = Shapes.end(); I != E; ++I) {
147fe6060f1SDimitry Andric       // Do not hoist instructions that access memory.
148fe6060f1SDimitry Andric       if (I->MI->mayLoadOrStore())
149fe6060f1SDimitry Andric         return false;
150fe6060f1SDimitry Andric       for (auto &MO : I->MI->operands()) {
151fe6060f1SDimitry Andric         if (MO.isDef())
152fe6060f1SDimitry Andric           continue;
153fe6060f1SDimitry Andric         // Do not hoist instructions if the sources' def under AMX instruction.
154fe6060f1SDimitry Andric         // TODO: We can handle isMoveImmediate MI here.
155fe6060f1SDimitry Andric         if (MO.isReg() && MIRef(MRI->getVRegDef(MO.getReg())) > FirstAMX)
156fe6060f1SDimitry Andric           return false;
157fe6060f1SDimitry Andric         // TODO: Maybe need more checks here.
158fe6060f1SDimitry Andric       }
159fe6060f1SDimitry Andric       MBB->insert(InsertPoint, I->MI->removeFromParent());
160fe6060f1SDimitry Andric     }
161fe6060f1SDimitry Andric     // We only need to mark the last shape in the BB now.
162fe6060f1SDimitry Andric     Shapes.clear();
163fe6060f1SDimitry Andric     Shapes.push_back(MIRef(&*--InsertPoint, MBB));
164fe6060f1SDimitry Andric     return true;
165fe6060f1SDimitry Andric   }
166e8d8bef9SDimitry Andric 
167e8d8bef9SDimitry Andric public:
168e8d8bef9SDimitry Andric   X86PreTileConfig() : MachineFunctionPass(ID) {}
169e8d8bef9SDimitry Andric 
170e8d8bef9SDimitry Andric   /// Return the pass name.
171e8d8bef9SDimitry Andric   StringRef getPassName() const override {
172e8d8bef9SDimitry Andric     return "Tile Register Pre-configure";
173e8d8bef9SDimitry Andric   }
174e8d8bef9SDimitry Andric 
175e8d8bef9SDimitry Andric   /// X86PreTileConfig analysis usage.
176fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
177fe6060f1SDimitry Andric     AU.setPreservesAll();
178fe6060f1SDimitry Andric     AU.addRequired<MachineLoopInfo>();
179fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
180fe6060f1SDimitry Andric   }
181e8d8bef9SDimitry Andric 
182fe6060f1SDimitry Andric   /// Clear MF related structures.
183fe6060f1SDimitry Andric   void releaseMemory() override {
184fe6060f1SDimitry Andric     ShapeBBs.clear();
185fe6060f1SDimitry Andric     DefVisited.clear();
186fe6060f1SDimitry Andric     BBVisitedInfo.clear();
187fe6060f1SDimitry Andric   }
188fe6060f1SDimitry Andric 
189fe6060f1SDimitry Andric   /// Perform ldtilecfg instructions inserting.
190fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
191e8d8bef9SDimitry Andric 
192e8d8bef9SDimitry Andric   static char ID;
193e8d8bef9SDimitry Andric };
194e8d8bef9SDimitry Andric 
195e8d8bef9SDimitry Andric } // end anonymous namespace
196e8d8bef9SDimitry Andric 
197e8d8bef9SDimitry Andric char X86PreTileConfig::ID = 0;
198e8d8bef9SDimitry Andric 
199e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86PreTileConfig, "tilepreconfig",
200fe6060f1SDimitry Andric                       "Tile Register Pre-configure", false, false)
201fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
202e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig",
203fe6060f1SDimitry Andric                     "Tile Register Pre-configure", false, false)
204e8d8bef9SDimitry Andric 
205fe6060f1SDimitry Andric void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) {
206fe6060f1SDimitry Andric   auto RecordShape = [&](MachineInstr *MI, MachineBasicBlock *MBB) {
207fe6060f1SDimitry Andric     MIRef MIR(MI, MBB);
208fe6060f1SDimitry Andric     auto I = llvm::lower_bound(ShapeBBs[MBB], MIR);
209fe6060f1SDimitry Andric     if (I == ShapeBBs[MBB].end() || *I != MIR)
210fe6060f1SDimitry Andric       ShapeBBs[MBB].insert(I, MIR);
211fe6060f1SDimitry Andric   };
212fe6060f1SDimitry Andric 
213fe6060f1SDimitry Andric   SmallVector<Register, 8> WorkList(
214fe6060f1SDimitry Andric       {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
215fe6060f1SDimitry Andric   while (!WorkList.empty()) {
216fe6060f1SDimitry Andric     Register R = WorkList.pop_back_val();
217fe6060f1SDimitry Andric     MachineInstr *DefMI = MRI->getVRegDef(R);
218fe6060f1SDimitry Andric     assert(DefMI && "R must has one define instruction");
219fe6060f1SDimitry Andric     MachineBasicBlock *DefMBB = DefMI->getParent();
220fe6060f1SDimitry Andric     if (DefMI->isMoveImmediate() || !DefVisited.insert(DefMI).second)
221fe6060f1SDimitry Andric       continue;
222fe6060f1SDimitry Andric     if (DefMI->isPHI()) {
223fe6060f1SDimitry Andric       for (unsigned I = 1; I < DefMI->getNumOperands(); I += 2)
224fe6060f1SDimitry Andric         if (isLoopBackEdge(DefMBB, DefMI->getOperand(I + 1).getMBB()))
225fe6060f1SDimitry Andric           RecordShape(DefMI, DefMBB); // In this case, PHI is also a shape def.
226fe6060f1SDimitry Andric         else
227fe6060f1SDimitry Andric           WorkList.push_back(DefMI->getOperand(I).getReg());
228fe6060f1SDimitry Andric     } else {
229fe6060f1SDimitry Andric       RecordShape(DefMI, DefMBB);
230fe6060f1SDimitry Andric     }
231fe6060f1SDimitry Andric   }
232e8d8bef9SDimitry Andric }
233e8d8bef9SDimitry Andric 
234fe6060f1SDimitry Andric bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
235fe6060f1SDimitry Andric   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
236fe6060f1SDimitry Andric   const TargetInstrInfo *TII = ST.getInstrInfo();
237fe6060f1SDimitry Andric   const TargetRegisterInfo *TRI = ST.getRegisterInfo();
238fe6060f1SDimitry Andric   const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
239*349cc55cSDimitry Andric   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
240e8d8bef9SDimitry Andric 
241fe6060f1SDimitry Andric   BitVector AMXRegs(TRI->getNumRegs());
242fe6060f1SDimitry Andric   for (unsigned I = 0; I < RC->getNumRegs(); I++)
243fe6060f1SDimitry Andric     AMXRegs.set(X86::TMM0 + I);
244fe6060f1SDimitry Andric 
245fe6060f1SDimitry Andric   // Iterate MF to collect information.
246fe6060f1SDimitry Andric   MRI = &MF.getRegInfo();
247fe6060f1SDimitry Andric   MLI = &getAnalysis<MachineLoopInfo>();
248fe6060f1SDimitry Andric   SmallSet<MIRef, 8> CfgNeedInsert;
249fe6060f1SDimitry Andric   SmallVector<MachineBasicBlock *, 8> CfgLiveInBBs;
250fe6060f1SDimitry Andric   for (auto &MBB : MF) {
251fe6060f1SDimitry Andric     size_t Pos = 0;
252fe6060f1SDimitry Andric     for (auto &MI : MBB) {
253fe6060f1SDimitry Andric       ++Pos;
254fe6060f1SDimitry Andric       if (isAMXInstruction(MI)) {
255fe6060f1SDimitry Andric         // If there's call before the AMX, we need to reload tile config.
256fe6060f1SDimitry Andric         if (BBVisitedInfo[&MBB].LastCall)
257fe6060f1SDimitry Andric           CfgNeedInsert.insert(BBVisitedInfo[&MBB].LastCall);
258fe6060f1SDimitry Andric         else // Otherwise, we need tile config to live in this BB.
259fe6060f1SDimitry Andric           BBVisitedInfo[&MBB].NeedTileCfgLiveIn = true;
260fe6060f1SDimitry Andric         // Always record the first AMX in case there's shape def after it.
261fe6060f1SDimitry Andric         if (!BBVisitedInfo[&MBB].FirstAMX)
262fe6060f1SDimitry Andric           BBVisitedInfo[&MBB].FirstAMX = MIRef(&MI, &MBB, Pos);
263fe6060f1SDimitry Andric       } else if (MI.isCall() && isDestructiveCall(MI, AMXRegs)) {
264fe6060f1SDimitry Andric         // Record the call only if the callee clobbers all AMX registers.
265fe6060f1SDimitry Andric         BBVisitedInfo[&MBB].LastCall = MIRef(&MI, &MBB, Pos);
266fe6060f1SDimitry Andric       }
267fe6060f1SDimitry Andric     }
268fe6060f1SDimitry Andric     if (BBVisitedInfo[&MBB].NeedTileCfgLiveIn) {
269fe6060f1SDimitry Andric       if (&MBB == &MF.front())
270fe6060f1SDimitry Andric         CfgNeedInsert.insert(MIRef(&MBB));
271fe6060f1SDimitry Andric       else
272fe6060f1SDimitry Andric         CfgLiveInBBs.push_back(&MBB);
273fe6060f1SDimitry Andric     }
274fe6060f1SDimitry Andric     if (BBVisitedInfo[&MBB].FirstAMX || BBVisitedInfo[&MBB].HasAMXRegLiveIn)
275fe6060f1SDimitry Andric       for (auto *Succ : MBB.successors())
276fe6060f1SDimitry Andric         if (!isLoopBackEdge(Succ, &MBB))
277fe6060f1SDimitry Andric           BBVisitedInfo[Succ].HasAMXRegLiveIn = true;
278fe6060f1SDimitry Andric   }
279fe6060f1SDimitry Andric 
280fe6060f1SDimitry Andric   // Update NeedTileCfgLiveIn for predecessors.
281fe6060f1SDimitry Andric   while (!CfgLiveInBBs.empty()) {
282fe6060f1SDimitry Andric     MachineBasicBlock *MBB = CfgLiveInBBs.pop_back_val();
283fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors()) {
284fe6060f1SDimitry Andric       if (BBVisitedInfo[Pred].LastCall) {
285fe6060f1SDimitry Andric         CfgNeedInsert.insert(BBVisitedInfo[Pred].LastCall);
286fe6060f1SDimitry Andric       } else if (!BBVisitedInfo[Pred].NeedTileCfgLiveIn) {
287fe6060f1SDimitry Andric         BBVisitedInfo[Pred].NeedTileCfgLiveIn = true;
288fe6060f1SDimitry Andric         if (Pred == &MF.front())
289fe6060f1SDimitry Andric           CfgNeedInsert.insert(MIRef(Pred));
290fe6060f1SDimitry Andric         else
291fe6060f1SDimitry Andric           CfgLiveInBBs.push_back(Pred);
292fe6060f1SDimitry Andric       }
293fe6060f1SDimitry Andric     }
294fe6060f1SDimitry Andric   }
295fe6060f1SDimitry Andric 
296fe6060f1SDimitry Andric   // There's no AMX instruction if we didn't find a tile config live in point.
297fe6060f1SDimitry Andric   if (CfgNeedInsert.empty())
298fe6060f1SDimitry Andric     return false;
299*349cc55cSDimitry Andric   X86FI->setHasVirtualTileReg(true);
300fe6060f1SDimitry Andric 
301fe6060f1SDimitry Andric   // Avoid to insert ldtilecfg before any shape defs.
302fe6060f1SDimitry Andric   SmallVector<MachineBasicBlock *, 8> WorkList;
303fe6060f1SDimitry Andric   for (auto &I : ShapeBBs) {
304fe6060f1SDimitry Andric     // TODO: We can hoist shapes across BBs here.
305fe6060f1SDimitry Andric     if (BBVisitedInfo[I.first].HasAMXRegLiveIn)
306fe6060f1SDimitry Andric       REPORT_CONFIG_FAIL
307fe6060f1SDimitry Andric     if (BBVisitedInfo[I.first].FirstAMX &&
308fe6060f1SDimitry Andric         BBVisitedInfo[I.first].FirstAMX < I.second.back() &&
309fe6060f1SDimitry Andric         !hoistShapesInBB(I.first, I.second))
310fe6060f1SDimitry Andric       REPORT_CONFIG_FAIL
311fe6060f1SDimitry Andric     WorkList.push_back(I.first);
312fe6060f1SDimitry Andric   }
313fe6060f1SDimitry Andric   while (!WorkList.empty()) {
314fe6060f1SDimitry Andric     MachineBasicBlock *MBB = WorkList.pop_back_val();
315fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors()) {
316fe6060f1SDimitry Andric       if (!BBVisitedInfo[Pred].TileCfgForbidden && !isLoopBackEdge(MBB, Pred)) {
317fe6060f1SDimitry Andric         BBVisitedInfo[Pred].TileCfgForbidden = true;
318fe6060f1SDimitry Andric         WorkList.push_back(Pred);
319fe6060f1SDimitry Andric       }
320fe6060f1SDimitry Andric     }
321fe6060f1SDimitry Andric   }
322fe6060f1SDimitry Andric 
323fe6060f1SDimitry Andric   DebugLoc DL;
324fe6060f1SDimitry Andric   SmallSet<MIRef, 8> VisitedOrInserted;
325fe6060f1SDimitry Andric   int SS = MF.getFrameInfo().CreateStackObject(
326fe6060f1SDimitry Andric       ST.getTileConfigSize(), ST.getTileConfigAlignment(), false);
327fe6060f1SDimitry Andric 
328fe6060f1SDimitry Andric   // Try to insert for the tile config live in points.
329*349cc55cSDimitry Andric   for (const auto &I : CfgNeedInsert) {
330fe6060f1SDimitry Andric     SmallSet<MIRef, 8> InsertPoints;
331fe6060f1SDimitry Andric     SmallVector<MIRef, 8> WorkList({I});
332fe6060f1SDimitry Andric     while (!WorkList.empty()) {
333fe6060f1SDimitry Andric       MIRef I = WorkList.pop_back_val();
334fe6060f1SDimitry Andric       if (!VisitedOrInserted.count(I)) {
335fe6060f1SDimitry Andric         if (!BBVisitedInfo[I.MBB].TileCfgForbidden) {
336fe6060f1SDimitry Andric           // If the BB is all shapes reachable, stop sink and try to insert.
337fe6060f1SDimitry Andric           InsertPoints.insert(I);
338fe6060f1SDimitry Andric         } else {
339fe6060f1SDimitry Andric           // Avoid the BB to be multi visited.
340fe6060f1SDimitry Andric           VisitedOrInserted.insert(I);
341fe6060f1SDimitry Andric           // Sink the inserting point along the chain with NeedTileCfgLiveIn =
342fe6060f1SDimitry Andric           // true when MBB isn't all shapes reachable.
343fe6060f1SDimitry Andric           for (auto *Succ : I.MBB->successors())
344fe6060f1SDimitry Andric             if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
345fe6060f1SDimitry Andric               WorkList.push_back(MIRef(Succ));
346fe6060f1SDimitry Andric         }
347fe6060f1SDimitry Andric       }
348fe6060f1SDimitry Andric     }
349fe6060f1SDimitry Andric 
350fe6060f1SDimitry Andric     // A given point might be forked due to shape conditions are not met.
351fe6060f1SDimitry Andric     for (MIRef I : InsertPoints) {
352fe6060f1SDimitry Andric       // Make sure we insert ldtilecfg after the last shape def in MBB.
353fe6060f1SDimitry Andric       if (ShapeBBs.count(I.MBB) && I < ShapeBBs[I.MBB].back())
354fe6060f1SDimitry Andric         I = ShapeBBs[I.MBB].back();
355fe6060f1SDimitry Andric       // There're chances the MBB is sunk more than once. Record it to avoid
356fe6060f1SDimitry Andric       // multi insert.
357fe6060f1SDimitry Andric       if (VisitedOrInserted.insert(I).second) {
358fe6060f1SDimitry Andric         auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin();
359fe6060f1SDimitry Andric         addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::LDTILECFG)),
360fe6060f1SDimitry Andric                           SS);
361fe6060f1SDimitry Andric       }
362fe6060f1SDimitry Andric     }
363fe6060f1SDimitry Andric   }
364fe6060f1SDimitry Andric 
365e8d8bef9SDimitry Andric   // Zero stack slot.
366fe6060f1SDimitry Andric   MachineBasicBlock &MBB = MF.front();
367fe6060f1SDimitry Andric   MachineInstr *MI = &*MBB.begin();
368fe6060f1SDimitry Andric   if (ST.hasAVX512()) {
369e8d8bef9SDimitry Andric     Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
370fe6060f1SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::VPXORDZrr), Zmm)
371e8d8bef9SDimitry Andric         .addReg(Zmm, RegState::Undef)
372e8d8bef9SDimitry Andric         .addReg(Zmm, RegState::Undef);
373fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), SS)
374e8d8bef9SDimitry Andric         .addReg(Zmm);
375fe6060f1SDimitry Andric   } else if (ST.hasAVX2()) {
376fe6060f1SDimitry Andric     Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
377fe6060f1SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::VPXORYrr), Ymm)
378fe6060f1SDimitry Andric         .addReg(Ymm, RegState::Undef)
379fe6060f1SDimitry Andric         .addReg(Ymm, RegState::Undef);
380fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS)
381fe6060f1SDimitry Andric         .addReg(Ymm);
382fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS, 32)
383fe6060f1SDimitry Andric         .addReg(Ymm);
384fe6060f1SDimitry Andric   } else {
385fe6060f1SDimitry Andric     assert(ST.hasSSE2() && "AMX should assume SSE2 enabled");
386fe6060f1SDimitry Andric     Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
387fe6060f1SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::PXORrr), Xmm)
388fe6060f1SDimitry Andric         .addReg(Xmm, RegState::Undef)
389fe6060f1SDimitry Andric         .addReg(Xmm, RegState::Undef);
390fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS)
391fe6060f1SDimitry Andric         .addReg(Xmm);
392fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 16)
393fe6060f1SDimitry Andric         .addReg(Xmm);
394fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 32)
395fe6060f1SDimitry Andric         .addReg(Xmm);
396fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 48)
397fe6060f1SDimitry Andric         .addReg(Xmm);
398e8d8bef9SDimitry Andric   }
399fe6060f1SDimitry Andric   // Fill in the palette first.
400fe6060f1SDimitry Andric   addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), SS).addImm(1);
401e8d8bef9SDimitry Andric 
402e8d8bef9SDimitry Andric   return true;
403e8d8bef9SDimitry Andric }
404e8d8bef9SDimitry Andric 
405e8d8bef9SDimitry Andric FunctionPass *llvm::createX86PreTileConfigPass() {
406e8d8bef9SDimitry Andric   return new X86PreTileConfig();
407e8d8bef9SDimitry Andric }
408