xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1fe6060f1SDimitry Andric //===-- X86PreTileConfig.cpp - Tile Register Pre-configure-----------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9fe6060f1SDimitry Andric /// \file Pass to pre-config the shapes of AMX registers
10fe6060f1SDimitry Andric /// AMX register needs to be configured before use. The shapes of AMX register
11fe6060f1SDimitry Andric /// are encoded in the 1st and 2nd machine operand of AMX pseudo instructions.
12e8d8bef9SDimitry Andric ///
13fe6060f1SDimitry Andric /// The instruction ldtilecfg is used to config the shapes. It must be reachable
14fe6060f1SDimitry Andric /// for all variable shapes. ldtilecfg will be inserted more than once if we
15fe6060f1SDimitry Andric /// cannot find a dominating point for all AMX instructions.
16e8d8bef9SDimitry Andric ///
17fe6060f1SDimitry Andric /// The configure register is caller saved according to ABI. We need to insert
18fe6060f1SDimitry Andric /// ldtilecfg again after the call instruction if callee clobbers any AMX
19fe6060f1SDimitry Andric /// registers.
20e8d8bef9SDimitry Andric ///
21fe6060f1SDimitry Andric /// This pass calculates all points that ldtilecfg need to be inserted to and
22fe6060f1SDimitry Andric /// insert them. It reports error if the reachability conditions aren't met.
23e8d8bef9SDimitry Andric //
24e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
25e8d8bef9SDimitry Andric 
26e8d8bef9SDimitry Andric #include "X86.h"
27e8d8bef9SDimitry Andric #include "X86InstrBuilder.h"
28349cc55cSDimitry Andric #include "X86MachineFunctionInfo.h"
29e8d8bef9SDimitry Andric #include "X86RegisterInfo.h"
30e8d8bef9SDimitry Andric #include "X86Subtarget.h"
315f757f3fSDimitry Andric #include "llvm/ADT/SmallSet.h"
32e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
33e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
34fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
3581ad6265SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
36e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
37e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h"
38e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
39e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
40*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h"
41e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
42e8d8bef9SDimitry Andric 
43e8d8bef9SDimitry Andric using namespace llvm;
44e8d8bef9SDimitry Andric 
45e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-pre-config"
4681ad6265SDimitry Andric 
4781ad6265SDimitry Andric static void emitErrorMsg(MachineFunction &MF) {
48*0fca6ea1SDimitry Andric   LLVMContext &Context = MF.getFunction().getContext();
4906c3fb27SDimitry Andric   Context.emitError(
5006c3fb27SDimitry Andric       MF.getName() +
5106c3fb27SDimitry Andric       ": Failed to config tile register, please define the shape earlier");
5281ad6265SDimitry Andric }
53e8d8bef9SDimitry Andric 
54e8d8bef9SDimitry Andric namespace {
55e8d8bef9SDimitry Andric 
56fe6060f1SDimitry Andric struct MIRef {
57fe6060f1SDimitry Andric   MachineInstr *MI = nullptr;
58fe6060f1SDimitry Andric   MachineBasicBlock *MBB = nullptr;
59fe6060f1SDimitry Andric   // A virtual position for instruction that will be inserted after MI.
60fe6060f1SDimitry Andric   size_t Pos = 0;
61fe6060f1SDimitry Andric   MIRef() = default;
62fe6060f1SDimitry Andric   MIRef(MachineBasicBlock *MBB) : MBB(MBB) {
63fe6060f1SDimitry Andric     for (auto I = MBB->begin(), E = MBB->end(); I != E && I->isPHI();
64fe6060f1SDimitry Andric          ++I, ++Pos)
65fe6060f1SDimitry Andric       MI = &*I;
66fe6060f1SDimitry Andric   }
67fe6060f1SDimitry Andric   MIRef(MachineInstr *MI)
68fe6060f1SDimitry Andric       : MI(MI), MBB(MI->getParent()),
69fe6060f1SDimitry Andric         Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
70fe6060f1SDimitry Andric   MIRef(MachineInstr *MI, MachineBasicBlock *MBB)
71fe6060f1SDimitry Andric       : MI(MI), MBB(MBB),
72fe6060f1SDimitry Andric         Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
73fe6060f1SDimitry Andric   MIRef(MachineInstr *MI, MachineBasicBlock *MBB, size_t Pos)
74fe6060f1SDimitry Andric       : MI(MI), MBB(MBB), Pos(Pos) {}
75fe6060f1SDimitry Andric   operator bool() const { return MBB != nullptr; }
76fe6060f1SDimitry Andric   bool operator==(const MIRef &RHS) const {
77fe6060f1SDimitry Andric     return MI == RHS.MI && MBB == RHS.MBB;
78fe6060f1SDimitry Andric   }
79fe6060f1SDimitry Andric   bool operator!=(const MIRef &RHS) const { return !(*this == RHS); }
80fe6060f1SDimitry Andric   bool operator<(const MIRef &RHS) const {
81fe6060f1SDimitry Andric     // Comparison between different BBs happens when inserting a MIRef into set.
82fe6060f1SDimitry Andric     // So we compare MBB first to make the insertion happy.
83fe6060f1SDimitry Andric     return MBB < RHS.MBB || (MBB == RHS.MBB && Pos < RHS.Pos);
84fe6060f1SDimitry Andric   }
85fe6060f1SDimitry Andric   bool operator>(const MIRef &RHS) const {
86fe6060f1SDimitry Andric     // Comparison between different BBs happens when inserting a MIRef into set.
87fe6060f1SDimitry Andric     // So we compare MBB first to make the insertion happy.
88fe6060f1SDimitry Andric     return MBB > RHS.MBB || (MBB == RHS.MBB && Pos > RHS.Pos);
89fe6060f1SDimitry Andric   }
90fe6060f1SDimitry Andric };
91e8d8bef9SDimitry Andric 
92fe6060f1SDimitry Andric struct BBInfo {
93fe6060f1SDimitry Andric   MIRef FirstAMX;
94fe6060f1SDimitry Andric   MIRef LastCall;
95fe6060f1SDimitry Andric   bool HasAMXRegLiveIn = false;
96fe6060f1SDimitry Andric   bool TileCfgForbidden = false;
97fe6060f1SDimitry Andric   bool NeedTileCfgLiveIn = false;
98fe6060f1SDimitry Andric };
99fe6060f1SDimitry Andric 
100fe6060f1SDimitry Andric class X86PreTileConfig : public MachineFunctionPass {
10106c3fb27SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
10206c3fb27SDimitry Andric   const MachineLoopInfo *MLI = nullptr;
103fe6060f1SDimitry Andric   SmallSet<MachineInstr *, 8> DefVisited;
104fe6060f1SDimitry Andric   DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;
105fe6060f1SDimitry Andric   DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs;
106fe6060f1SDimitry Andric 
107fe6060f1SDimitry Andric   /// Check if the callee will clobber AMX registers.
108fe6060f1SDimitry Andric   bool isDestructiveCall(MachineInstr &MI, BitVector UsableRegs) {
109fe6060f1SDimitry Andric     auto Iter = llvm::find_if(
110fe6060f1SDimitry Andric         MI.operands(), [](MachineOperand &MO) { return MO.isRegMask(); });
111fe6060f1SDimitry Andric     if (Iter == MI.operands_end())
112fe6060f1SDimitry Andric       return false;
113fe6060f1SDimitry Andric     UsableRegs.clearBitsInMask(Iter->getRegMask());
114fe6060f1SDimitry Andric     return !UsableRegs.none();
115fe6060f1SDimitry Andric   }
116fe6060f1SDimitry Andric 
117fe6060f1SDimitry Andric   /// Check if MI is AMX pseudo instruction.
118fe6060f1SDimitry Andric   bool isAMXInstruction(MachineInstr &MI) {
119fe6060f1SDimitry Andric     if (MI.isPHI() || MI.isDebugInstr() || MI.getNumOperands() < 3)
120fe6060f1SDimitry Andric       return false;
121fe6060f1SDimitry Andric     MachineOperand &MO = MI.getOperand(0);
122fe6060f1SDimitry Andric     // We can simply check if it is AMX instruction by its def.
123fe6060f1SDimitry Andric     // But we should exclude old API which uses physical registers.
124fe6060f1SDimitry Andric     if (MO.isReg() && MO.getReg().isVirtual() &&
125fe6060f1SDimitry Andric         MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) {
126fe6060f1SDimitry Andric       collectShapeInfo(MI);
127fe6060f1SDimitry Andric       return true;
128fe6060f1SDimitry Andric     }
129fe6060f1SDimitry Andric     // PTILESTOREDV is the only exception that doesn't def a AMX register.
130fe6060f1SDimitry Andric     return MI.getOpcode() == X86::PTILESTOREDV;
131fe6060f1SDimitry Andric   }
132fe6060f1SDimitry Andric 
133fe6060f1SDimitry Andric   /// Check if it is an edge from loop bottom to loop head.
134fe6060f1SDimitry Andric   bool isLoopBackEdge(MachineBasicBlock *Header, MachineBasicBlock *Bottom) {
135fe6060f1SDimitry Andric     if (!MLI->isLoopHeader(Header))
136fe6060f1SDimitry Andric       return false;
137fe6060f1SDimitry Andric     auto *ML = MLI->getLoopFor(Header);
138fe6060f1SDimitry Andric     if (ML->contains(Bottom) && ML->isLoopLatch(Bottom))
139fe6060f1SDimitry Andric       return true;
140fe6060f1SDimitry Andric 
141fe6060f1SDimitry Andric     return false;
142fe6060f1SDimitry Andric   }
143fe6060f1SDimitry Andric 
144fe6060f1SDimitry Andric   /// Collect the shape def information for later use.
145fe6060f1SDimitry Andric   void collectShapeInfo(MachineInstr &MI);
146fe6060f1SDimitry Andric 
147fe6060f1SDimitry Andric   /// Try to hoist shapes definded below AMX instructions.
148fe6060f1SDimitry Andric   bool hoistShapesInBB(MachineBasicBlock *MBB, SmallVectorImpl<MIRef> &Shapes) {
149fe6060f1SDimitry Andric     MIRef &FirstAMX = BBVisitedInfo[MBB].FirstAMX;
150fe6060f1SDimitry Andric     auto FirstShapeBelowAMX = llvm::lower_bound(Shapes, FirstAMX);
151fe6060f1SDimitry Andric     auto InsertPoint = FirstAMX.MI->getIterator();
152fe6060f1SDimitry Andric     for (auto I = FirstShapeBelowAMX, E = Shapes.end(); I != E; ++I) {
153fe6060f1SDimitry Andric       // Do not hoist instructions that access memory.
154fe6060f1SDimitry Andric       if (I->MI->mayLoadOrStore())
155fe6060f1SDimitry Andric         return false;
156fe6060f1SDimitry Andric       for (auto &MO : I->MI->operands()) {
157fe6060f1SDimitry Andric         if (MO.isDef())
158fe6060f1SDimitry Andric           continue;
159fe6060f1SDimitry Andric         // Do not hoist instructions if the sources' def under AMX instruction.
160fe6060f1SDimitry Andric         // TODO: We can handle isMoveImmediate MI here.
161fe6060f1SDimitry Andric         if (MO.isReg() && MIRef(MRI->getVRegDef(MO.getReg())) > FirstAMX)
162fe6060f1SDimitry Andric           return false;
163fe6060f1SDimitry Andric         // TODO: Maybe need more checks here.
164fe6060f1SDimitry Andric       }
165fe6060f1SDimitry Andric       MBB->insert(InsertPoint, I->MI->removeFromParent());
166fe6060f1SDimitry Andric     }
167fe6060f1SDimitry Andric     // We only need to mark the last shape in the BB now.
168fe6060f1SDimitry Andric     Shapes.clear();
169fe6060f1SDimitry Andric     Shapes.push_back(MIRef(&*--InsertPoint, MBB));
170fe6060f1SDimitry Andric     return true;
171fe6060f1SDimitry Andric   }
172e8d8bef9SDimitry Andric 
173e8d8bef9SDimitry Andric public:
174e8d8bef9SDimitry Andric   X86PreTileConfig() : MachineFunctionPass(ID) {}
175e8d8bef9SDimitry Andric 
176e8d8bef9SDimitry Andric   /// Return the pass name.
177e8d8bef9SDimitry Andric   StringRef getPassName() const override {
178e8d8bef9SDimitry Andric     return "Tile Register Pre-configure";
179e8d8bef9SDimitry Andric   }
180e8d8bef9SDimitry Andric 
181e8d8bef9SDimitry Andric   /// X86PreTileConfig analysis usage.
182fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
183fe6060f1SDimitry Andric     AU.setPreservesAll();
184*0fca6ea1SDimitry Andric     AU.addRequired<MachineLoopInfoWrapperPass>();
185fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
186fe6060f1SDimitry Andric   }
187e8d8bef9SDimitry Andric 
188fe6060f1SDimitry Andric   /// Clear MF related structures.
189fe6060f1SDimitry Andric   void releaseMemory() override {
190fe6060f1SDimitry Andric     ShapeBBs.clear();
191fe6060f1SDimitry Andric     DefVisited.clear();
192fe6060f1SDimitry Andric     BBVisitedInfo.clear();
193fe6060f1SDimitry Andric   }
194fe6060f1SDimitry Andric 
195fe6060f1SDimitry Andric   /// Perform ldtilecfg instructions inserting.
196fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
197e8d8bef9SDimitry Andric 
198e8d8bef9SDimitry Andric   static char ID;
199e8d8bef9SDimitry Andric };
200e8d8bef9SDimitry Andric 
201e8d8bef9SDimitry Andric } // end anonymous namespace
202e8d8bef9SDimitry Andric 
203e8d8bef9SDimitry Andric char X86PreTileConfig::ID = 0;
204e8d8bef9SDimitry Andric 
205e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86PreTileConfig, "tilepreconfig",
206fe6060f1SDimitry Andric                       "Tile Register Pre-configure", false, false)
207*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
208e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig",
209fe6060f1SDimitry Andric                     "Tile Register Pre-configure", false, false)
210e8d8bef9SDimitry Andric 
211fe6060f1SDimitry Andric void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) {
212fe6060f1SDimitry Andric   auto RecordShape = [&](MachineInstr *MI, MachineBasicBlock *MBB) {
213fe6060f1SDimitry Andric     MIRef MIR(MI, MBB);
214fe6060f1SDimitry Andric     auto I = llvm::lower_bound(ShapeBBs[MBB], MIR);
215fe6060f1SDimitry Andric     if (I == ShapeBBs[MBB].end() || *I != MIR)
216fe6060f1SDimitry Andric       ShapeBBs[MBB].insert(I, MIR);
217fe6060f1SDimitry Andric   };
218fe6060f1SDimitry Andric 
219fe6060f1SDimitry Andric   SmallVector<Register, 8> WorkList(
220fe6060f1SDimitry Andric       {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
221fe6060f1SDimitry Andric   while (!WorkList.empty()) {
222fe6060f1SDimitry Andric     Register R = WorkList.pop_back_val();
223fe6060f1SDimitry Andric     MachineInstr *DefMI = MRI->getVRegDef(R);
224fe6060f1SDimitry Andric     assert(DefMI && "R must has one define instruction");
225fe6060f1SDimitry Andric     MachineBasicBlock *DefMBB = DefMI->getParent();
226fe6060f1SDimitry Andric     if (DefMI->isMoveImmediate() || !DefVisited.insert(DefMI).second)
227fe6060f1SDimitry Andric       continue;
228fe6060f1SDimitry Andric     if (DefMI->isPHI()) {
229fe6060f1SDimitry Andric       for (unsigned I = 1; I < DefMI->getNumOperands(); I += 2)
230fe6060f1SDimitry Andric         if (isLoopBackEdge(DefMBB, DefMI->getOperand(I + 1).getMBB()))
231fe6060f1SDimitry Andric           RecordShape(DefMI, DefMBB); // In this case, PHI is also a shape def.
232fe6060f1SDimitry Andric         else
233fe6060f1SDimitry Andric           WorkList.push_back(DefMI->getOperand(I).getReg());
234fe6060f1SDimitry Andric     } else {
235fe6060f1SDimitry Andric       RecordShape(DefMI, DefMBB);
236fe6060f1SDimitry Andric     }
237fe6060f1SDimitry Andric   }
238e8d8bef9SDimitry Andric }
239e8d8bef9SDimitry Andric 
240fe6060f1SDimitry Andric bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
241*0fca6ea1SDimitry Andric   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
242*0fca6ea1SDimitry Andric   // Early exit in the common case of non-AMX code.
243*0fca6ea1SDimitry Andric   if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
244*0fca6ea1SDimitry Andric     return false;
245*0fca6ea1SDimitry Andric 
246fe6060f1SDimitry Andric   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
247fe6060f1SDimitry Andric   const TargetInstrInfo *TII = ST.getInstrInfo();
248fe6060f1SDimitry Andric   const TargetRegisterInfo *TRI = ST.getRegisterInfo();
249fe6060f1SDimitry Andric   const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
250e8d8bef9SDimitry Andric 
251fe6060f1SDimitry Andric   BitVector AMXRegs(TRI->getNumRegs());
252fe6060f1SDimitry Andric   for (unsigned I = 0; I < RC->getNumRegs(); I++)
253fe6060f1SDimitry Andric     AMXRegs.set(X86::TMM0 + I);
254fe6060f1SDimitry Andric 
255fe6060f1SDimitry Andric   // Iterate MF to collect information.
256fe6060f1SDimitry Andric   MRI = &MF.getRegInfo();
257*0fca6ea1SDimitry Andric   MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
258fe6060f1SDimitry Andric   SmallSet<MIRef, 8> CfgNeedInsert;
259fe6060f1SDimitry Andric   SmallVector<MachineBasicBlock *, 8> CfgLiveInBBs;
260fe6060f1SDimitry Andric   for (auto &MBB : MF) {
261fe6060f1SDimitry Andric     size_t Pos = 0;
262fe6060f1SDimitry Andric     for (auto &MI : MBB) {
263fe6060f1SDimitry Andric       ++Pos;
264fe6060f1SDimitry Andric       if (isAMXInstruction(MI)) {
265fe6060f1SDimitry Andric         // If there's call before the AMX, we need to reload tile config.
266fe6060f1SDimitry Andric         if (BBVisitedInfo[&MBB].LastCall)
267fe6060f1SDimitry Andric           CfgNeedInsert.insert(BBVisitedInfo[&MBB].LastCall);
268fe6060f1SDimitry Andric         else // Otherwise, we need tile config to live in this BB.
269fe6060f1SDimitry Andric           BBVisitedInfo[&MBB].NeedTileCfgLiveIn = true;
270fe6060f1SDimitry Andric         // Always record the first AMX in case there's shape def after it.
271fe6060f1SDimitry Andric         if (!BBVisitedInfo[&MBB].FirstAMX)
272fe6060f1SDimitry Andric           BBVisitedInfo[&MBB].FirstAMX = MIRef(&MI, &MBB, Pos);
273fe6060f1SDimitry Andric       } else if (MI.isCall() && isDestructiveCall(MI, AMXRegs)) {
274fe6060f1SDimitry Andric         // Record the call only if the callee clobbers all AMX registers.
275fe6060f1SDimitry Andric         BBVisitedInfo[&MBB].LastCall = MIRef(&MI, &MBB, Pos);
276fe6060f1SDimitry Andric       }
277fe6060f1SDimitry Andric     }
278fe6060f1SDimitry Andric     if (BBVisitedInfo[&MBB].NeedTileCfgLiveIn) {
279fe6060f1SDimitry Andric       if (&MBB == &MF.front())
280fe6060f1SDimitry Andric         CfgNeedInsert.insert(MIRef(&MBB));
281fe6060f1SDimitry Andric       else
282fe6060f1SDimitry Andric         CfgLiveInBBs.push_back(&MBB);
283fe6060f1SDimitry Andric     }
284fe6060f1SDimitry Andric     if (BBVisitedInfo[&MBB].FirstAMX || BBVisitedInfo[&MBB].HasAMXRegLiveIn)
285fe6060f1SDimitry Andric       for (auto *Succ : MBB.successors())
286fe6060f1SDimitry Andric         if (!isLoopBackEdge(Succ, &MBB))
287fe6060f1SDimitry Andric           BBVisitedInfo[Succ].HasAMXRegLiveIn = true;
288fe6060f1SDimitry Andric   }
289fe6060f1SDimitry Andric 
290fe6060f1SDimitry Andric   // Update NeedTileCfgLiveIn for predecessors.
291fe6060f1SDimitry Andric   while (!CfgLiveInBBs.empty()) {
292fe6060f1SDimitry Andric     MachineBasicBlock *MBB = CfgLiveInBBs.pop_back_val();
293fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors()) {
294fe6060f1SDimitry Andric       if (BBVisitedInfo[Pred].LastCall) {
295fe6060f1SDimitry Andric         CfgNeedInsert.insert(BBVisitedInfo[Pred].LastCall);
296fe6060f1SDimitry Andric       } else if (!BBVisitedInfo[Pred].NeedTileCfgLiveIn) {
297fe6060f1SDimitry Andric         BBVisitedInfo[Pred].NeedTileCfgLiveIn = true;
298fe6060f1SDimitry Andric         if (Pred == &MF.front())
299fe6060f1SDimitry Andric           CfgNeedInsert.insert(MIRef(Pred));
300fe6060f1SDimitry Andric         else
301fe6060f1SDimitry Andric           CfgLiveInBBs.push_back(Pred);
302fe6060f1SDimitry Andric       }
303fe6060f1SDimitry Andric     }
304fe6060f1SDimitry Andric   }
305fe6060f1SDimitry Andric 
306fe6060f1SDimitry Andric   // There's no AMX instruction if we didn't find a tile config live in point.
307fe6060f1SDimitry Andric   if (CfgNeedInsert.empty())
308fe6060f1SDimitry Andric     return false;
309fe6060f1SDimitry Andric 
310fe6060f1SDimitry Andric   // Avoid to insert ldtilecfg before any shape defs.
311fe6060f1SDimitry Andric   SmallVector<MachineBasicBlock *, 8> WorkList;
312fe6060f1SDimitry Andric   for (auto &I : ShapeBBs) {
313fe6060f1SDimitry Andric     // TODO: We can hoist shapes across BBs here.
31481ad6265SDimitry Andric     if (BBVisitedInfo[I.first].HasAMXRegLiveIn) {
31581ad6265SDimitry Andric       // We are not able to config tile registers since the shape to config
31681ad6265SDimitry Andric       // is not defined yet. Emit error message and continue. The function
31781ad6265SDimitry Andric       // would not config tile registers.
31881ad6265SDimitry Andric       emitErrorMsg(MF);
31981ad6265SDimitry Andric       return false;
32081ad6265SDimitry Andric     }
321fe6060f1SDimitry Andric     if (BBVisitedInfo[I.first].FirstAMX &&
322fe6060f1SDimitry Andric         BBVisitedInfo[I.first].FirstAMX < I.second.back() &&
32381ad6265SDimitry Andric         !hoistShapesInBB(I.first, I.second)) {
32481ad6265SDimitry Andric       emitErrorMsg(MF);
32581ad6265SDimitry Andric       return false;
32681ad6265SDimitry Andric     }
327fe6060f1SDimitry Andric     WorkList.push_back(I.first);
328fe6060f1SDimitry Andric   }
329fe6060f1SDimitry Andric   while (!WorkList.empty()) {
330fe6060f1SDimitry Andric     MachineBasicBlock *MBB = WorkList.pop_back_val();
331fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors()) {
332fe6060f1SDimitry Andric       if (!BBVisitedInfo[Pred].TileCfgForbidden && !isLoopBackEdge(MBB, Pred)) {
333fe6060f1SDimitry Andric         BBVisitedInfo[Pred].TileCfgForbidden = true;
334fe6060f1SDimitry Andric         WorkList.push_back(Pred);
335fe6060f1SDimitry Andric       }
336fe6060f1SDimitry Andric     }
337fe6060f1SDimitry Andric   }
338fe6060f1SDimitry Andric 
339fe6060f1SDimitry Andric   DebugLoc DL;
340fe6060f1SDimitry Andric   SmallSet<MIRef, 8> VisitedOrInserted;
341fe6060f1SDimitry Andric   int SS = MF.getFrameInfo().CreateStackObject(
342fe6060f1SDimitry Andric       ST.getTileConfigSize(), ST.getTileConfigAlignment(), false);
343fe6060f1SDimitry Andric 
344fe6060f1SDimitry Andric   // Try to insert for the tile config live in points.
345349cc55cSDimitry Andric   for (const auto &I : CfgNeedInsert) {
346fe6060f1SDimitry Andric     SmallSet<MIRef, 8> InsertPoints;
347fe6060f1SDimitry Andric     SmallVector<MIRef, 8> WorkList({I});
348fe6060f1SDimitry Andric     while (!WorkList.empty()) {
349fe6060f1SDimitry Andric       MIRef I = WorkList.pop_back_val();
350fe6060f1SDimitry Andric       if (!VisitedOrInserted.count(I)) {
351fe6060f1SDimitry Andric         if (!BBVisitedInfo[I.MBB].TileCfgForbidden) {
352fe6060f1SDimitry Andric           // If the BB is all shapes reachable, stop sink and try to insert.
353fe6060f1SDimitry Andric           InsertPoints.insert(I);
354fe6060f1SDimitry Andric         } else {
355fe6060f1SDimitry Andric           // Avoid the BB to be multi visited.
356fe6060f1SDimitry Andric           VisitedOrInserted.insert(I);
357fe6060f1SDimitry Andric           // Sink the inserting point along the chain with NeedTileCfgLiveIn =
358fe6060f1SDimitry Andric           // true when MBB isn't all shapes reachable.
359fe6060f1SDimitry Andric           for (auto *Succ : I.MBB->successors())
360fe6060f1SDimitry Andric             if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
361fe6060f1SDimitry Andric               WorkList.push_back(MIRef(Succ));
362fe6060f1SDimitry Andric         }
363fe6060f1SDimitry Andric       }
364fe6060f1SDimitry Andric     }
365fe6060f1SDimitry Andric 
366fe6060f1SDimitry Andric     // A given point might be forked due to shape conditions are not met.
367fe6060f1SDimitry Andric     for (MIRef I : InsertPoints) {
368fe6060f1SDimitry Andric       // Make sure we insert ldtilecfg after the last shape def in MBB.
369fe6060f1SDimitry Andric       if (ShapeBBs.count(I.MBB) && I < ShapeBBs[I.MBB].back())
370fe6060f1SDimitry Andric         I = ShapeBBs[I.MBB].back();
371fe6060f1SDimitry Andric       // There're chances the MBB is sunk more than once. Record it to avoid
372fe6060f1SDimitry Andric       // multi insert.
373fe6060f1SDimitry Andric       if (VisitedOrInserted.insert(I).second) {
374fe6060f1SDimitry Andric         auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin();
37581ad6265SDimitry Andric         addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::PLDTILECFGV)),
376fe6060f1SDimitry Andric                           SS);
377fe6060f1SDimitry Andric       }
378fe6060f1SDimitry Andric     }
379fe6060f1SDimitry Andric   }
380fe6060f1SDimitry Andric 
381e8d8bef9SDimitry Andric   // Zero stack slot.
382fe6060f1SDimitry Andric   MachineBasicBlock &MBB = MF.front();
383fe6060f1SDimitry Andric   MachineInstr *MI = &*MBB.begin();
384fe6060f1SDimitry Andric   if (ST.hasAVX512()) {
385e8d8bef9SDimitry Andric     Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
38681ad6265SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
387fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), SS)
388e8d8bef9SDimitry Andric         .addReg(Zmm);
389fe6060f1SDimitry Andric   } else if (ST.hasAVX2()) {
390fe6060f1SDimitry Andric     Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
39181ad6265SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
392fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS)
393fe6060f1SDimitry Andric         .addReg(Ymm);
394fe6060f1SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS, 32)
395fe6060f1SDimitry Andric         .addReg(Ymm);
396fe6060f1SDimitry Andric   } else {
397fe6060f1SDimitry Andric     assert(ST.hasSSE2() && "AMX should assume SSE2 enabled");
39881ad6265SDimitry Andric     unsigned StoreOpc = ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
399fe6060f1SDimitry Andric     Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
40081ad6265SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
40181ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS).addReg(Xmm);
40281ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 16)
403fe6060f1SDimitry Andric         .addReg(Xmm);
40481ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 32)
405fe6060f1SDimitry Andric         .addReg(Xmm);
40681ad6265SDimitry Andric     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 48)
407fe6060f1SDimitry Andric         .addReg(Xmm);
408e8d8bef9SDimitry Andric   }
409fe6060f1SDimitry Andric   // Fill in the palette first.
410fe6060f1SDimitry Andric   addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), SS).addImm(1);
411e8d8bef9SDimitry Andric 
412e8d8bef9SDimitry Andric   return true;
413e8d8bef9SDimitry Andric }
414e8d8bef9SDimitry Andric 
415e8d8bef9SDimitry Andric FunctionPass *llvm::createX86PreTileConfigPass() {
416e8d8bef9SDimitry Andric   return new X86PreTileConfig();
417e8d8bef9SDimitry Andric }
418