xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86FastTileConfig.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1*fe6060f1SDimitry Andric //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
2*fe6060f1SDimitry Andric //
3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*fe6060f1SDimitry Andric //
7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8*fe6060f1SDimitry Andric //
9*fe6060f1SDimitry Andric /// \file Pass to config the shape of AMX physical registers
10*fe6060f1SDimitry Andric /// AMX register need to be configured before use. Before FastRegAllocation pass
11*fe6060f1SDimitry Andric /// the ldtilecfg instruction is inserted, however at that time we don't
12*fe6060f1SDimitry Andric /// know the shape of each physical tile registers, because the register
13*fe6060f1SDimitry Andric /// allocation is not done yet. This pass runs after register allocation
14*fe6060f1SDimitry Andric /// pass. It collects the shape information of each physical tile register
15*fe6060f1SDimitry Andric /// and store the shape in the stack slot that is allocated for load config
16*fe6060f1SDimitry Andric /// to tile config register.
17*fe6060f1SDimitry Andric //
18*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
19*fe6060f1SDimitry Andric 
20*fe6060f1SDimitry Andric #include "X86.h"
21*fe6060f1SDimitry Andric #include "X86InstrBuilder.h"
22*fe6060f1SDimitry Andric #include "X86MachineFunctionInfo.h"
23*fe6060f1SDimitry Andric #include "X86RegisterInfo.h"
24*fe6060f1SDimitry Andric #include "X86Subtarget.h"
25*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
26*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
27*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
28*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
29*fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h"
30*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
31*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
32*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
33*fe6060f1SDimitry Andric 
34*fe6060f1SDimitry Andric using namespace llvm;
35*fe6060f1SDimitry Andric 
36*fe6060f1SDimitry Andric #define DEBUG_TYPE "fasttileconfig"
37*fe6060f1SDimitry Andric 
38*fe6060f1SDimitry Andric namespace {
39*fe6060f1SDimitry Andric 
40*fe6060f1SDimitry Andric class X86FastTileConfig : public MachineFunctionPass {
41*fe6060f1SDimitry Andric   // context
42*fe6060f1SDimitry Andric   MachineFunction *MF = nullptr;
43*fe6060f1SDimitry Andric   const X86Subtarget *ST = nullptr;
44*fe6060f1SDimitry Andric   const TargetRegisterInfo *TRI = nullptr;
45*fe6060f1SDimitry Andric   const TargetInstrInfo *TII = nullptr;
46*fe6060f1SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
47*fe6060f1SDimitry Andric 
48*fe6060f1SDimitry Andric   MachineInstr *getTileConfigPoint();
49*fe6060f1SDimitry Andric   void tileConfig();
50*fe6060f1SDimitry Andric 
51*fe6060f1SDimitry Andric public:
52*fe6060f1SDimitry Andric   X86FastTileConfig() : MachineFunctionPass(ID) {}
53*fe6060f1SDimitry Andric 
54*fe6060f1SDimitry Andric   bool fastTileConfig();
55*fe6060f1SDimitry Andric   bool isTileLoad(MachineInstr &MI);
56*fe6060f1SDimitry Andric   bool isTileStore(MachineInstr &MI);
57*fe6060f1SDimitry Andric   bool isAMXInstr(MachineInstr &MI);
58*fe6060f1SDimitry Andric   void getTileStoreShape(MachineInstr &MI,
59*fe6060f1SDimitry Andric                          SmallVector<MachineOperand *> &ShapedTiles);
60*fe6060f1SDimitry Andric 
61*fe6060f1SDimitry Andric   MachineInstr *getKeyAMXInstr(MachineInstr *MI);
62*fe6060f1SDimitry Andric   void getTileShapesCfg(MachineInstr *MI,
63*fe6060f1SDimitry Andric                         SmallVector<MachineOperand *> &ShapedTiles);
64*fe6060f1SDimitry Andric   void getShapeCfgInstrs(MachineInstr *MI,
65*fe6060f1SDimitry Andric                          std::map<unsigned, MachineInstr *> &RowCfgs,
66*fe6060f1SDimitry Andric                          std::map<unsigned, MachineInstr *> &ColCfgs);
67*fe6060f1SDimitry Andric 
68*fe6060f1SDimitry Andric   /// Return the pass name.
69*fe6060f1SDimitry Andric   StringRef getPassName() const override {
70*fe6060f1SDimitry Andric     return "Fast Tile Register Configure";
71*fe6060f1SDimitry Andric   }
72*fe6060f1SDimitry Andric 
73*fe6060f1SDimitry Andric   void materializeTileCfg(MachineInstr *MI);
74*fe6060f1SDimitry Andric 
75*fe6060f1SDimitry Andric   void rewriteTileCfg(SmallVector<MachineOperand *> &ShapedTiles,
76*fe6060f1SDimitry Andric                       std::map<unsigned, MachineInstr *> &RowCfgs,
77*fe6060f1SDimitry Andric                       std::map<unsigned, MachineInstr *> &ColCfgs);
78*fe6060f1SDimitry Andric 
79*fe6060f1SDimitry Andric   /// Perform register allocation.
80*fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MFunc) override;
81*fe6060f1SDimitry Andric 
82*fe6060f1SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
83*fe6060f1SDimitry Andric     return MachineFunctionProperties().set(
84*fe6060f1SDimitry Andric         MachineFunctionProperties::Property::NoPHIs);
85*fe6060f1SDimitry Andric   }
86*fe6060f1SDimitry Andric 
87*fe6060f1SDimitry Andric   static char ID;
88*fe6060f1SDimitry Andric };
89*fe6060f1SDimitry Andric 
90*fe6060f1SDimitry Andric } // end anonymous namespace
91*fe6060f1SDimitry Andric 
92*fe6060f1SDimitry Andric char X86FastTileConfig::ID = 0;
93*fe6060f1SDimitry Andric 
94*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
95*fe6060f1SDimitry Andric                       "Fast Tile Register Configure", false, false)
96*fe6060f1SDimitry Andric INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
97*fe6060f1SDimitry Andric                     "Fast Tile Register Configure", false, false)
98*fe6060f1SDimitry Andric 
99*fe6060f1SDimitry Andric static bool isTilePhysReg(MachineOperand &Op) {
100*fe6060f1SDimitry Andric   if (!Op.isReg())
101*fe6060f1SDimitry Andric     return false;
102*fe6060f1SDimitry Andric 
103*fe6060f1SDimitry Andric   Register Reg = Op.getReg();
104*fe6060f1SDimitry Andric   if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
105*fe6060f1SDimitry Andric     return true;
106*fe6060f1SDimitry Andric   return false;
107*fe6060f1SDimitry Andric }
108*fe6060f1SDimitry Andric 
109*fe6060f1SDimitry Andric static unsigned getTilePhysRegIdx(MachineOperand *Op) {
110*fe6060f1SDimitry Andric   assert(isTilePhysReg(*Op) && "Tile Operand is invalid");
111*fe6060f1SDimitry Andric   return Op->getReg() - X86::TMM0;
112*fe6060f1SDimitry Andric }
113*fe6060f1SDimitry Andric 
114*fe6060f1SDimitry Andric static inline void adjustRowCfg(unsigned TIdx, MachineInstr *MI) {
115*fe6060f1SDimitry Andric   unsigned Offset = 48 + TIdx;
116*fe6060f1SDimitry Andric   MI->getOperand(3).ChangeToImmediate(Offset);
117*fe6060f1SDimitry Andric }
118*fe6060f1SDimitry Andric 
119*fe6060f1SDimitry Andric static inline void adjustColCfg(unsigned TIdx, MachineInstr *MI) {
120*fe6060f1SDimitry Andric   unsigned Offset = 16 + TIdx * 2;
121*fe6060f1SDimitry Andric   MI->getOperand(3).ChangeToImmediate(Offset);
122*fe6060f1SDimitry Andric }
123*fe6060f1SDimitry Andric 
124*fe6060f1SDimitry Andric bool X86FastTileConfig::isTileLoad(MachineInstr &MI) {
125*fe6060f1SDimitry Andric   return MI.getOpcode() == X86::PTILELOADDV ||
126*fe6060f1SDimitry Andric          MI.getOpcode() == X86::PTILELOADDT1V;
127*fe6060f1SDimitry Andric }
128*fe6060f1SDimitry Andric bool X86FastTileConfig::isTileStore(MachineInstr &MI) {
129*fe6060f1SDimitry Andric   return MI.getOpcode() == X86::PTILESTOREDV;
130*fe6060f1SDimitry Andric }
131*fe6060f1SDimitry Andric bool X86FastTileConfig::isAMXInstr(MachineInstr &MI) {
132*fe6060f1SDimitry Andric   // TODO: May need to handle some special nontile amx instrucion.
133*fe6060f1SDimitry Andric   if (MI.getOpcode() == X86::PLDTILECFGV || MI.isDebugInstr())
134*fe6060f1SDimitry Andric     return false;
135*fe6060f1SDimitry Andric 
136*fe6060f1SDimitry Andric   for (MachineOperand &MO : MI.operands())
137*fe6060f1SDimitry Andric     if (isTilePhysReg(MO))
138*fe6060f1SDimitry Andric       return true;
139*fe6060f1SDimitry Andric 
140*fe6060f1SDimitry Andric   return false;
141*fe6060f1SDimitry Andric }
142*fe6060f1SDimitry Andric 
143*fe6060f1SDimitry Andric MachineInstr *X86FastTileConfig::getKeyAMXInstr(MachineInstr *MI) {
144*fe6060f1SDimitry Andric   auto Cfg = MachineBasicBlock::iterator(MI);
145*fe6060f1SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
146*fe6060f1SDimitry Andric   MachineInstr *KeyMI = nullptr;
147*fe6060f1SDimitry Andric   int KeyAMXNum = 0;
148*fe6060f1SDimitry Andric 
149*fe6060f1SDimitry Andric   for (auto II = Cfg; II != MBB->end(); II++) {
150*fe6060f1SDimitry Andric     if (isTileLoad(*II)) {
151*fe6060f1SDimitry Andric       KeyMI = &*II;
152*fe6060f1SDimitry Andric       continue;
153*fe6060f1SDimitry Andric     }
154*fe6060f1SDimitry Andric 
155*fe6060f1SDimitry Andric     if (isTileStore(*II)) {
156*fe6060f1SDimitry Andric       assert(KeyMI && "Key AMX Should be found before!");
157*fe6060f1SDimitry Andric       break;
158*fe6060f1SDimitry Andric     }
159*fe6060f1SDimitry Andric 
160*fe6060f1SDimitry Andric     if (isAMXInstr(*II)) {
161*fe6060f1SDimitry Andric       assert((KeyAMXNum == 0) && "Too many Key AMX instruction!");
162*fe6060f1SDimitry Andric       KeyAMXNum++;
163*fe6060f1SDimitry Andric       KeyMI = &*II;
164*fe6060f1SDimitry Andric     }
165*fe6060f1SDimitry Andric   }
166*fe6060f1SDimitry Andric   assert(KeyMI && "There must be an AMX instruction.");
167*fe6060f1SDimitry Andric   return KeyMI;
168*fe6060f1SDimitry Andric }
169*fe6060f1SDimitry Andric 
170*fe6060f1SDimitry Andric // Orderly get the tiles in key amx instruction, uses before defs.
171*fe6060f1SDimitry Andric void X86FastTileConfig::getTileShapesCfg(
172*fe6060f1SDimitry Andric     MachineInstr *CfgMI, SmallVector<MachineOperand *> &ShapedTiles) {
173*fe6060f1SDimitry Andric   MachineInstr *KeyMI = getKeyAMXInstr(CfgMI);
174*fe6060f1SDimitry Andric 
175*fe6060f1SDimitry Andric   SmallVector<MachineOperand *> DefTiles;
176*fe6060f1SDimitry Andric   for (MachineOperand &MO : KeyMI->operands()) {
177*fe6060f1SDimitry Andric     if (!isTilePhysReg(MO))
178*fe6060f1SDimitry Andric       continue;
179*fe6060f1SDimitry Andric     if (MO.isDef())
180*fe6060f1SDimitry Andric       DefTiles.push_back(&MO);
181*fe6060f1SDimitry Andric     else
182*fe6060f1SDimitry Andric       ShapedTiles.push_back(&MO);
183*fe6060f1SDimitry Andric   }
184*fe6060f1SDimitry Andric   ShapedTiles.append(DefTiles);
185*fe6060f1SDimitry Andric }
186*fe6060f1SDimitry Andric 
187*fe6060f1SDimitry Andric // We pre-config the shapes at position named with "amx.tmm.N.shape.row* and
188*fe6060f1SDimitry Andric // amx.shape.N.col*" at pass "Pre AMX Tile Config".
189*fe6060f1SDimitry Andric // The 'N' implies the order of tiles in key amx intrinsic.
190*fe6060f1SDimitry Andric void X86FastTileConfig::getShapeCfgInstrs(
191*fe6060f1SDimitry Andric     MachineInstr *MI, std::map<unsigned, MachineInstr *> &RowCfgs,
192*fe6060f1SDimitry Andric     std::map<unsigned, MachineInstr *> &ColCfgs) {
193*fe6060f1SDimitry Andric   auto Cfg = MachineBasicBlock::iterator(MI);
194*fe6060f1SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
195*fe6060f1SDimitry Andric 
196*fe6060f1SDimitry Andric   for (auto II = Cfg; II != MBB->begin(); II--) {
197*fe6060f1SDimitry Andric     if (isAMXInstr(*II) || II->isTerminator() || II->isCall())
198*fe6060f1SDimitry Andric       break;
199*fe6060f1SDimitry Andric     if (!II->mayStore() || !II->hasOneMemOperand())
200*fe6060f1SDimitry Andric       continue;
201*fe6060f1SDimitry Andric     const Value *MemPtr = II->memoperands()[0]->getValue();
202*fe6060f1SDimitry Andric     if (!MemPtr)
203*fe6060f1SDimitry Andric       continue;
204*fe6060f1SDimitry Andric 
205*fe6060f1SDimitry Andric     StringRef Name = MemPtr->getName();
206*fe6060f1SDimitry Andric     if (!Name.startswith("amx.tmm."))
207*fe6060f1SDimitry Andric       continue;
208*fe6060f1SDimitry Andric 
209*fe6060f1SDimitry Andric     // Get the 'N'th tile shape config in key amx instruction.
210*fe6060f1SDimitry Andric     auto N = Name.find(".shape");
211*fe6060f1SDimitry Andric     StringRef STileIdx = Name.slice(8, N);
212*fe6060f1SDimitry Andric     unsigned Idx;
213*fe6060f1SDimitry Andric     STileIdx.getAsInteger(10, Idx);
214*fe6060f1SDimitry Andric 
215*fe6060f1SDimitry Andric     // And related them with their store instructions.
216*fe6060f1SDimitry Andric     if (Name.contains("row"))
217*fe6060f1SDimitry Andric       RowCfgs[Idx] = &*II;
218*fe6060f1SDimitry Andric     else if (Name.contains("col"))
219*fe6060f1SDimitry Andric       ColCfgs[Idx] = &*II;
220*fe6060f1SDimitry Andric     else
221*fe6060f1SDimitry Andric       llvm_unreachable("Invalid tile shape info!");
222*fe6060f1SDimitry Andric   }
223*fe6060f1SDimitry Andric   assert((RowCfgs.size() == ColCfgs.size()) &&
224*fe6060f1SDimitry Andric          "The number of tile row and col must be equal!");
225*fe6060f1SDimitry Andric }
226*fe6060f1SDimitry Andric 
227*fe6060f1SDimitry Andric // Here is the data format for the tile config.
228*fe6060f1SDimitry Andric // 0      palette   = 1 now.
229*fe6060f1SDimitry Andric // 1      start_row = 0 now.
230*fe6060f1SDimitry Andric // 2-15   reserved, must be zero
231*fe6060f1SDimitry Andric // 16-17  tile0.colsb Tile 0 bytes per row.
232*fe6060f1SDimitry Andric // 18-19  tile1.colsb Tile 1 bytes per row.
233*fe6060f1SDimitry Andric // 20-21  tile2.colsb Tile 2 bytes per row.
234*fe6060f1SDimitry Andric // ... (sequence continues)
235*fe6060f1SDimitry Andric // 30-31  tile7.colsb Tile 7 bytes per row.
236*fe6060f1SDimitry Andric // 32-47  reserved, must be zero
237*fe6060f1SDimitry Andric // 48     tile0.rows Tile 0 rows.
238*fe6060f1SDimitry Andric // 49     tile1.rows Tile 1 rows.
239*fe6060f1SDimitry Andric // 50     tile2.rows Tile 2 rows.
240*fe6060f1SDimitry Andric // ... (sequence continues)
241*fe6060f1SDimitry Andric // 55     tile7.rows Tile 7 rows.
242*fe6060f1SDimitry Andric // 56-63  reserved, must be zero
243*fe6060f1SDimitry Andric void X86FastTileConfig::rewriteTileCfg(
244*fe6060f1SDimitry Andric     SmallVector<MachineOperand *> &ShapedTiles,
245*fe6060f1SDimitry Andric     std::map<unsigned, MachineInstr *> &RowCfgs,
246*fe6060f1SDimitry Andric     std::map<unsigned, MachineInstr *> &ColCfgs) {
247*fe6060f1SDimitry Andric   assert((RowCfgs.size() == ShapedTiles.size()) &&
248*fe6060f1SDimitry Andric          "The number of tile shapes not equal with the number of tiles!");
249*fe6060f1SDimitry Andric 
250*fe6060f1SDimitry Andric   // Orderly get the tiles and adjust the shape config.
251*fe6060f1SDimitry Andric   for (unsigned I = 0, E = ShapedTiles.size(); I < E; I++) {
252*fe6060f1SDimitry Andric     MachineOperand *MO = ShapedTiles[I];
253*fe6060f1SDimitry Andric     unsigned TmmIdx = getTilePhysRegIdx(MO);
254*fe6060f1SDimitry Andric     if (I == TmmIdx)
255*fe6060f1SDimitry Andric       continue;
256*fe6060f1SDimitry Andric     adjustRowCfg(TmmIdx, RowCfgs[I]);
257*fe6060f1SDimitry Andric     adjustColCfg(TmmIdx, ColCfgs[I]);
258*fe6060f1SDimitry Andric   }
259*fe6060f1SDimitry Andric }
260*fe6060f1SDimitry Andric 
261*fe6060f1SDimitry Andric // We have already preconfig the shapes before fast register allocation at
262*fe6060f1SDimitry Andric // X86PreAMXConfig::preWriteTileCfg(). Now, we have done fast register
263*fe6060f1SDimitry Andric // allocation, the shapes pre-written before may not rightly corresponding
264*fe6060f1SDimitry Andric // to the correct tmm registers, so we need adjust them.
265*fe6060f1SDimitry Andric void X86FastTileConfig::materializeTileCfg(MachineInstr *CfgMI) {
266*fe6060f1SDimitry Andric   SmallVector<MachineOperand *> ShapedTiles;
267*fe6060f1SDimitry Andric   std::map<unsigned, MachineInstr *> RowCfgs;
268*fe6060f1SDimitry Andric   std::map<unsigned, MachineInstr *> ColCfgs;
269*fe6060f1SDimitry Andric 
270*fe6060f1SDimitry Andric   // Orderly keep the tile uses and def in ShapedTiles;
271*fe6060f1SDimitry Andric   getTileShapesCfg(CfgMI, ShapedTiles);
272*fe6060f1SDimitry Andric   assert(ShapedTiles.size() && "Not find shapes config!");
273*fe6060f1SDimitry Andric 
274*fe6060f1SDimitry Andric   getShapeCfgInstrs(CfgMI, RowCfgs, ColCfgs);
275*fe6060f1SDimitry Andric 
276*fe6060f1SDimitry Andric   rewriteTileCfg(ShapedTiles, RowCfgs, ColCfgs);
277*fe6060f1SDimitry Andric }
278*fe6060f1SDimitry Andric 
279*fe6060f1SDimitry Andric bool X86FastTileConfig::fastTileConfig() {
280*fe6060f1SDimitry Andric   bool Changed = false;
281*fe6060f1SDimitry Andric 
282*fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : *MF) {
283*fe6060f1SDimitry Andric     SmallVector<MachineInstr *, 2> CFGs;
284*fe6060f1SDimitry Andric     for (MachineInstr &MI : MBB)
285*fe6060f1SDimitry Andric       if (MI.getOpcode() == X86::PLDTILECFGV)
286*fe6060f1SDimitry Andric         CFGs.push_back(&MI);
287*fe6060f1SDimitry Andric     for (auto *MI : CFGs)
288*fe6060f1SDimitry Andric       materializeTileCfg(MI);
289*fe6060f1SDimitry Andric     if (!CFGs.empty())
290*fe6060f1SDimitry Andric       Changed = true;
291*fe6060f1SDimitry Andric   }
292*fe6060f1SDimitry Andric   return Changed;
293*fe6060f1SDimitry Andric }
294*fe6060f1SDimitry Andric 
295*fe6060f1SDimitry Andric bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
296*fe6060f1SDimitry Andric   MF = &MFunc;
297*fe6060f1SDimitry Andric   MRI = &MFunc.getRegInfo();
298*fe6060f1SDimitry Andric   ST = &MFunc.getSubtarget<X86Subtarget>();
299*fe6060f1SDimitry Andric   TRI = ST->getRegisterInfo();
300*fe6060f1SDimitry Andric   TII = MFunc.getSubtarget().getInstrInfo();
301*fe6060f1SDimitry Andric 
302*fe6060f1SDimitry Andric   return fastTileConfig();
303*fe6060f1SDimitry Andric }
304*fe6060f1SDimitry Andric 
305*fe6060f1SDimitry Andric FunctionPass *llvm::createX86FastTileConfigPass() {
306*fe6060f1SDimitry Andric   return new X86FastTileConfig();
307*fe6060f1SDimitry Andric }
308