xref: /llvm-project/llvm/lib/Target/X86/X86FastTileConfig.cpp (revision dfe43bd1ca46c59399b7cbbf81b09256232e27f9)
1d4bdeca5SXiang1 Zhang //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
2d4bdeca5SXiang1 Zhang //
3d4bdeca5SXiang1 Zhang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4d4bdeca5SXiang1 Zhang // See https://llvm.org/LICENSE.txt for license information.
5d4bdeca5SXiang1 Zhang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6d4bdeca5SXiang1 Zhang //
7d4bdeca5SXiang1 Zhang //===----------------------------------------------------------------------===//
8d4bdeca5SXiang1 Zhang //
9d4bdeca5SXiang1 Zhang /// \file Pass to config the shape of AMX physical registers
10d4bdeca5SXiang1 Zhang /// AMX register need to be configured before use. Before FastRegAllocation pass
11d4bdeca5SXiang1 Zhang /// the ldtilecfg instruction is inserted, however at that time we don't
12d4bdeca5SXiang1 Zhang /// know the shape of each physical tile registers, because the register
1363233da7SLuo, Yuanke /// allocation is not done yet. This pass runs after register allocation
14d4bdeca5SXiang1 Zhang /// pass. It collects the shape information of each physical tile register
15d4bdeca5SXiang1 Zhang /// and store the shape in the stack slot that is allocated for load config
16d4bdeca5SXiang1 Zhang /// to tile config register.
17d4bdeca5SXiang1 Zhang //
18d4bdeca5SXiang1 Zhang //===----------------------------------------------------------------------===//
19d4bdeca5SXiang1 Zhang 
20d4bdeca5SXiang1 Zhang #include "X86.h"
21d4bdeca5SXiang1 Zhang #include "X86InstrBuilder.h"
22d4bdeca5SXiang1 Zhang #include "X86MachineFunctionInfo.h"
23d4bdeca5SXiang1 Zhang #include "X86Subtarget.h"
24d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineFrameInfo.h"
25d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineFunctionPass.h"
26d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineInstr.h"
27d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineRegisterInfo.h"
28d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/Passes.h"
29d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/TargetInstrInfo.h"
30d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/TargetRegisterInfo.h"
31d4bdeca5SXiang1 Zhang 
32d4bdeca5SXiang1 Zhang using namespace llvm;
33d4bdeca5SXiang1 Zhang 
34d4bdeca5SXiang1 Zhang #define DEBUG_TYPE "fasttileconfig"
35d4bdeca5SXiang1 Zhang 
36d4bdeca5SXiang1 Zhang namespace {
37d4bdeca5SXiang1 Zhang 
38d4bdeca5SXiang1 Zhang class X86FastTileConfig : public MachineFunctionPass {
39d4bdeca5SXiang1 Zhang   // context
40d4bdeca5SXiang1 Zhang   MachineFunction *MF = nullptr;
41d4bdeca5SXiang1 Zhang   const TargetInstrInfo *TII = nullptr;
42d4bdeca5SXiang1 Zhang   MachineRegisterInfo *MRI = nullptr;
43496156acSLuo, Yuanke   const TargetRegisterInfo *TRI = nullptr;
44c4dba471SLuo, Yuanke   X86MachineFunctionInfo *X86FI = nullptr;
45d4bdeca5SXiang1 Zhang 
46496156acSLuo, Yuanke   bool configBasicBlock(MachineBasicBlock &MBB);
47d4bdeca5SXiang1 Zhang 
48d4bdeca5SXiang1 Zhang public:
49d4bdeca5SXiang1 Zhang   X86FastTileConfig() : MachineFunctionPass(ID) {}
50d4bdeca5SXiang1 Zhang 
51d4bdeca5SXiang1 Zhang   /// Return the pass name.
52d4bdeca5SXiang1 Zhang   StringRef getPassName() const override {
53d4bdeca5SXiang1 Zhang     return "Fast Tile Register Configure";
54d4bdeca5SXiang1 Zhang   }
55d4bdeca5SXiang1 Zhang 
56496156acSLuo, Yuanke   void getAnalysisUsage(AnalysisUsage &AU) const override {
57496156acSLuo, Yuanke     AU.setPreservesAll();
58496156acSLuo, Yuanke     MachineFunctionPass::getAnalysisUsage(AU);
59496156acSLuo, Yuanke   }
60d4bdeca5SXiang1 Zhang 
61d4bdeca5SXiang1 Zhang   /// Perform register allocation.
62d4bdeca5SXiang1 Zhang   bool runOnMachineFunction(MachineFunction &MFunc) override;
63d4bdeca5SXiang1 Zhang 
64d4bdeca5SXiang1 Zhang   MachineFunctionProperties getRequiredProperties() const override {
65d4bdeca5SXiang1 Zhang     return MachineFunctionProperties().set(
66d4bdeca5SXiang1 Zhang         MachineFunctionProperties::Property::NoPHIs);
67d4bdeca5SXiang1 Zhang   }
68d4bdeca5SXiang1 Zhang 
69d4bdeca5SXiang1 Zhang   static char ID;
70d4bdeca5SXiang1 Zhang };
71d4bdeca5SXiang1 Zhang 
72d4bdeca5SXiang1 Zhang } // end anonymous namespace
73d4bdeca5SXiang1 Zhang 
74d4bdeca5SXiang1 Zhang char X86FastTileConfig::ID = 0;
75d4bdeca5SXiang1 Zhang 
76d4bdeca5SXiang1 Zhang INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
77d4bdeca5SXiang1 Zhang                       "Fast Tile Register Configure", false, false)
78d4bdeca5SXiang1 Zhang INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
79d4bdeca5SXiang1 Zhang                     "Fast Tile Register Configure", false, false)
80d4bdeca5SXiang1 Zhang 
81*c72a751dSPhoebe Wang static unsigned getNumDefTiles(MachineRegisterInfo *MRI, MachineInstr &MI) {
82496156acSLuo, Yuanke   // There is no phi instruction after register allocation.
83496156acSLuo, Yuanke   assert(MI.isPHI() == false);
84496156acSLuo, Yuanke   // The instruction must have 3 operands: tile def, row, col.
85496156acSLuo, Yuanke   // It should be AMX pseudo instruction that have shape operand.
86496156acSLuo, Yuanke   if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
87496156acSLuo, Yuanke       !MI.isPseudo())
88*c72a751dSPhoebe Wang     return 0;
89496156acSLuo, Yuanke   MachineOperand &MO = MI.getOperand(0);
90d4bdeca5SXiang1 Zhang 
91496156acSLuo, Yuanke   if (MO.isReg()) {
92496156acSLuo, Yuanke     Register Reg = MO.getReg();
93*c72a751dSPhoebe Wang     // FIXME: It may be used after Greedy RA and the physical
94496156acSLuo, Yuanke     // register is not rewritten yet.
95*c72a751dSPhoebe Wang     if (Reg.isVirtual()) {
96*c72a751dSPhoebe Wang       if (MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
97*c72a751dSPhoebe Wang         return 1;
98*c72a751dSPhoebe Wang       if (MRI->getRegClass(Reg)->getID() == X86::TILEPAIRRegClassID)
99*c72a751dSPhoebe Wang         return 2;
100*c72a751dSPhoebe Wang     }
101d4bdeca5SXiang1 Zhang     if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
102*c72a751dSPhoebe Wang       return 1;
103*c72a751dSPhoebe Wang     if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7)
104*c72a751dSPhoebe Wang       return 2;
105496156acSLuo, Yuanke   }
106496156acSLuo, Yuanke 
107*c72a751dSPhoebe Wang   return 0;
108*c72a751dSPhoebe Wang }
109*c72a751dSPhoebe Wang 
110*c72a751dSPhoebe Wang static unsigned getTMMIndex(Register Reg) {
111*c72a751dSPhoebe Wang   if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
112*c72a751dSPhoebe Wang     return Reg - X86::TMM0;
113*c72a751dSPhoebe Wang   if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7)
114*c72a751dSPhoebe Wang     return (Reg - X86::TMM0_TMM1) * 2;
115*c72a751dSPhoebe Wang   llvm_unreachable("Invalid Tmm Reg!");
116d4bdeca5SXiang1 Zhang }
117d4bdeca5SXiang1 Zhang 
118496156acSLuo, Yuanke // PreTileConfig should configure the tile registers based on basic
119496156acSLuo, Yuanke // block.
120496156acSLuo, Yuanke bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
121496156acSLuo, Yuanke   bool Change = false;
122496156acSLuo, Yuanke   SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
123496156acSLuo, Yuanke   for (MachineInstr &MI : reverse(MBB)) {
124*c72a751dSPhoebe Wang     unsigned DefNum = getNumDefTiles(MRI, MI);
125*c72a751dSPhoebe Wang     if (DefNum == 0 && MI.getOpcode() != X86::PLDTILECFGV)
126d4bdeca5SXiang1 Zhang       continue;
127496156acSLuo, Yuanke     // AMX instructions that define tile register.
128aaaf9cedSLuo, Yuanke     if (MI.getOpcode() != X86::PLDTILECFGV) {
129496156acSLuo, Yuanke       MachineOperand &Row = MI.getOperand(1);
130*c72a751dSPhoebe Wang       unsigned TMMIdx = getTMMIndex(MI.getOperand(0).getReg());
131*c72a751dSPhoebe Wang       for (unsigned I = 0; I < DefNum; I++) {
132*c72a751dSPhoebe Wang         MachineOperand &Col = MI.getOperand(2 + I);
133*c72a751dSPhoebe Wang         ShapeInfos.push_back({TMMIdx + I, ShapeT(&Row, &Col)});
134*c72a751dSPhoebe Wang       }
135aaaf9cedSLuo, Yuanke     } else { // PLDTILECFGV
136496156acSLuo, Yuanke       // Rewrite the shape information to memory. Stack slot should have
137496156acSLuo, Yuanke       // been initialized to zero in pre config.
138496156acSLuo, Yuanke       int SS = MI.getOperand(0).getIndex(); // tile config stack slot.
139496156acSLuo, Yuanke       for (auto &ShapeInfo : ShapeInfos) {
140496156acSLuo, Yuanke         DebugLoc DL;
141496156acSLuo, Yuanke         unsigned TMMIdx = ShapeInfo.first;
142496156acSLuo, Yuanke         Register RowReg = ShapeInfo.second.getRow()->getReg();
143496156acSLuo, Yuanke         Register ColReg = ShapeInfo.second.getCol()->getReg();
144d4bdeca5SXiang1 Zhang         // Here is the data format for the tile config.
145496156acSLuo, Yuanke         // 0      palette
146496156acSLuo, Yuanke         // 1      start_row
147d4bdeca5SXiang1 Zhang         // 2-15   reserved, must be zero
148d4bdeca5SXiang1 Zhang         // 16-17  tile0.colsb Tile 0 bytes per row.
149d4bdeca5SXiang1 Zhang         // 18-19  tile1.colsb Tile 1 bytes per row.
150d4bdeca5SXiang1 Zhang         // 20-21  tile2.colsb Tile 2 bytes per row.
151d4bdeca5SXiang1 Zhang         // ... (sequence continues)
152d4bdeca5SXiang1 Zhang         // 30-31  tile7.colsb Tile 7 bytes per row.
153d4bdeca5SXiang1 Zhang         // 32-47  reserved, must be zero
154d4bdeca5SXiang1 Zhang         // 48     tile0.rows Tile 0 rows.
155d4bdeca5SXiang1 Zhang         // 49     tile1.rows Tile 1 rows.
156d4bdeca5SXiang1 Zhang         // 50     tile2.rows Tile 2 rows.
157d4bdeca5SXiang1 Zhang         // ... (sequence continues)
158d4bdeca5SXiang1 Zhang         // 55     tile7.rows Tile 7 rows.
159d4bdeca5SXiang1 Zhang         // 56-63  reserved, must be zero
160496156acSLuo, Yuanke         int RowOffset = 48 + TMMIdx;
161496156acSLuo, Yuanke         int ColOffset = 16 + TMMIdx * 2;
162d4bdeca5SXiang1 Zhang 
163496156acSLuo, Yuanke         Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit);
164496156acSLuo, Yuanke         BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg);
165496156acSLuo, Yuanke         MachineInstrBuilder StoreRow =
166496156acSLuo, Yuanke             BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr));
167496156acSLuo, Yuanke         addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg);
168496156acSLuo, Yuanke 
169496156acSLuo, Yuanke         MachineInstrBuilder StoreCol =
170496156acSLuo, Yuanke             BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr));
171496156acSLuo, Yuanke         addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg);
172496156acSLuo, Yuanke       }
173496156acSLuo, Yuanke       ShapeInfos.clear();
174496156acSLuo, Yuanke       Change = true;
175d4bdeca5SXiang1 Zhang     }
176d4bdeca5SXiang1 Zhang   }
177d4bdeca5SXiang1 Zhang 
178496156acSLuo, Yuanke   return Change;
179d4bdeca5SXiang1 Zhang }
180d4bdeca5SXiang1 Zhang 
181d4bdeca5SXiang1 Zhang bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
1829a2c8418Saengelke   X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
1839a2c8418Saengelke   // Early exit in the common case of non-AMX code.
1849a2c8418Saengelke   if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
1859a2c8418Saengelke     return false;
1869a2c8418Saengelke 
187d4bdeca5SXiang1 Zhang   MF = &MFunc;
188d4bdeca5SXiang1 Zhang   MRI = &MFunc.getRegInfo();
189496156acSLuo, Yuanke   const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();
190d4bdeca5SXiang1 Zhang   TRI = ST->getRegisterInfo();
191d4bdeca5SXiang1 Zhang   TII = MFunc.getSubtarget().getInstrInfo();
192496156acSLuo, Yuanke   bool Change = false;
193d4bdeca5SXiang1 Zhang 
194496156acSLuo, Yuanke   // Loop over all of the basic blocks, eliminating virtual register references
195496156acSLuo, Yuanke   for (MachineBasicBlock &MBB : MFunc)
196496156acSLuo, Yuanke     Change |= configBasicBlock(MBB);
197496156acSLuo, Yuanke 
198496156acSLuo, Yuanke   return Change;
199d4bdeca5SXiang1 Zhang }
200d4bdeca5SXiang1 Zhang 
201d4bdeca5SXiang1 Zhang FunctionPass *llvm::createX86FastTileConfigPass() {
202d4bdeca5SXiang1 Zhang   return new X86FastTileConfig();
203d4bdeca5SXiang1 Zhang }
204