xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86FastTileConfig.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1fe6060f1SDimitry Andric //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file Pass to config the shape of AMX physical registers
10fe6060f1SDimitry Andric /// AMX register need to be configured before use. Before FastRegAllocation pass
11fe6060f1SDimitry Andric /// the ldtilecfg instruction is inserted, however at that time we don't
12fe6060f1SDimitry Andric /// know the shape of each physical tile registers, because the register
13fe6060f1SDimitry Andric /// allocation is not done yet. This pass runs after register allocation
14fe6060f1SDimitry Andric /// pass. It collects the shape information of each physical tile register
15fe6060f1SDimitry Andric /// and store the shape in the stack slot that is allocated for load config
16fe6060f1SDimitry Andric /// to tile config register.
17fe6060f1SDimitry Andric //
18fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
19fe6060f1SDimitry Andric 
20fe6060f1SDimitry Andric #include "X86.h"
21fe6060f1SDimitry Andric #include "X86InstrBuilder.h"
22fe6060f1SDimitry Andric #include "X86MachineFunctionInfo.h"
23fe6060f1SDimitry Andric #include "X86RegisterInfo.h"
24fe6060f1SDimitry Andric #include "X86Subtarget.h"
25fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
26fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
27fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
28fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
29fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h"
30fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
31fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
32fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
33fe6060f1SDimitry Andric 
34fe6060f1SDimitry Andric using namespace llvm;
35fe6060f1SDimitry Andric 
36fe6060f1SDimitry Andric #define DEBUG_TYPE "fasttileconfig"
37fe6060f1SDimitry Andric 
38fe6060f1SDimitry Andric namespace {
39fe6060f1SDimitry Andric 
40fe6060f1SDimitry Andric class X86FastTileConfig : public MachineFunctionPass {
41fe6060f1SDimitry Andric   // context
42fe6060f1SDimitry Andric   MachineFunction *MF = nullptr;
43fe6060f1SDimitry Andric   const TargetInstrInfo *TII = nullptr;
44fe6060f1SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
4581ad6265SDimitry Andric   const TargetRegisterInfo *TRI = nullptr;
46349cc55cSDimitry Andric   X86MachineFunctionInfo *X86FI = nullptr;
47fe6060f1SDimitry Andric 
4881ad6265SDimitry Andric   bool configBasicBlock(MachineBasicBlock &MBB);
49fe6060f1SDimitry Andric 
50fe6060f1SDimitry Andric public:
51fe6060f1SDimitry Andric   X86FastTileConfig() : MachineFunctionPass(ID) {}
52fe6060f1SDimitry Andric 
53fe6060f1SDimitry Andric   /// Return the pass name.
54fe6060f1SDimitry Andric   StringRef getPassName() const override {
55fe6060f1SDimitry Andric     return "Fast Tile Register Configure";
56fe6060f1SDimitry Andric   }
57fe6060f1SDimitry Andric 
5881ad6265SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
5981ad6265SDimitry Andric     AU.setPreservesAll();
6081ad6265SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
6181ad6265SDimitry Andric   }
62fe6060f1SDimitry Andric 
63fe6060f1SDimitry Andric   /// Perform register allocation.
64fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MFunc) override;
65fe6060f1SDimitry Andric 
66fe6060f1SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
67fe6060f1SDimitry Andric     return MachineFunctionProperties().set(
68fe6060f1SDimitry Andric         MachineFunctionProperties::Property::NoPHIs);
69fe6060f1SDimitry Andric   }
70fe6060f1SDimitry Andric 
71fe6060f1SDimitry Andric   static char ID;
72fe6060f1SDimitry Andric };
73fe6060f1SDimitry Andric 
74fe6060f1SDimitry Andric } // end anonymous namespace
75fe6060f1SDimitry Andric 
76fe6060f1SDimitry Andric char X86FastTileConfig::ID = 0;
77fe6060f1SDimitry Andric 
78fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
79fe6060f1SDimitry Andric                       "Fast Tile Register Configure", false, false)
80fe6060f1SDimitry Andric INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
81fe6060f1SDimitry Andric                     "Fast Tile Register Configure", false, false)
82fe6060f1SDimitry Andric 
8381ad6265SDimitry Andric static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
8481ad6265SDimitry Andric   // There is no phi instruction after register allocation.
8581ad6265SDimitry Andric   assert(MI.isPHI() == false);
8681ad6265SDimitry Andric   // The instruction must have 3 operands: tile def, row, col.
8781ad6265SDimitry Andric   // It should be AMX pseudo instruction that have shape operand.
8881ad6265SDimitry Andric   if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
8981ad6265SDimitry Andric       !MI.isPseudo())
90fe6060f1SDimitry Andric     return false;
9181ad6265SDimitry Andric   MachineOperand &MO = MI.getOperand(0);
92fe6060f1SDimitry Andric 
9381ad6265SDimitry Andric   if (MO.isReg()) {
9481ad6265SDimitry Andric     Register Reg = MO.getReg();
9581ad6265SDimitry Andric     // FIXME it may be used after Greedy RA and the physical
9681ad6265SDimitry Andric     // register is not rewritten yet.
9781ad6265SDimitry Andric     if (Reg.isVirtual() &&
9881ad6265SDimitry Andric         MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
9981ad6265SDimitry Andric       return true;
100fe6060f1SDimitry Andric     if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
101fe6060f1SDimitry Andric       return true;
10281ad6265SDimitry Andric   }
10381ad6265SDimitry Andric 
104fe6060f1SDimitry Andric   return false;
105fe6060f1SDimitry Andric }
106fe6060f1SDimitry Andric 
10781ad6265SDimitry Andric // PreTileConfig should configure the tile registers based on basic
10881ad6265SDimitry Andric // block.
10981ad6265SDimitry Andric bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
11081ad6265SDimitry Andric   bool Change = false;
11181ad6265SDimitry Andric   SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
11281ad6265SDimitry Andric   for (MachineInstr &MI : reverse(MBB)) {
11381ad6265SDimitry Andric     if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
114fe6060f1SDimitry Andric       continue;
11581ad6265SDimitry Andric     // AMX instructions that define tile register.
11681ad6265SDimitry Andric     if (MI.getOpcode() != X86::PLDTILECFGV) {
11781ad6265SDimitry Andric       MachineOperand &Row = MI.getOperand(1);
11881ad6265SDimitry Andric       MachineOperand &Col = MI.getOperand(2);
11981ad6265SDimitry Andric       unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;
12081ad6265SDimitry Andric       ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
12181ad6265SDimitry Andric     } else { // PLDTILECFGV
12281ad6265SDimitry Andric       // Rewrite the shape information to memory. Stack slot should have
12381ad6265SDimitry Andric       // been initialized to zero in pre config.
12481ad6265SDimitry Andric       int SS = MI.getOperand(0).getIndex(); // tile config stack slot.
12581ad6265SDimitry Andric       for (auto &ShapeInfo : ShapeInfos) {
12681ad6265SDimitry Andric         DebugLoc DL;
12781ad6265SDimitry Andric         unsigned TMMIdx = ShapeInfo.first;
12881ad6265SDimitry Andric         Register RowReg = ShapeInfo.second.getRow()->getReg();
12981ad6265SDimitry Andric         Register ColReg = ShapeInfo.second.getCol()->getReg();
130fe6060f1SDimitry Andric         // Here is the data format for the tile config.
13181ad6265SDimitry Andric         // 0      palette
13281ad6265SDimitry Andric         // 1      start_row
133fe6060f1SDimitry Andric         // 2-15   reserved, must be zero
134fe6060f1SDimitry Andric         // 16-17  tile0.colsb Tile 0 bytes per row.
135fe6060f1SDimitry Andric         // 18-19  tile1.colsb Tile 1 bytes per row.
136fe6060f1SDimitry Andric         // 20-21  tile2.colsb Tile 2 bytes per row.
137fe6060f1SDimitry Andric         // ... (sequence continues)
138fe6060f1SDimitry Andric         // 30-31  tile7.colsb Tile 7 bytes per row.
139fe6060f1SDimitry Andric         // 32-47  reserved, must be zero
140fe6060f1SDimitry Andric         // 48     tile0.rows Tile 0 rows.
141fe6060f1SDimitry Andric         // 49     tile1.rows Tile 1 rows.
142fe6060f1SDimitry Andric         // 50     tile2.rows Tile 2 rows.
143fe6060f1SDimitry Andric         // ... (sequence continues)
144fe6060f1SDimitry Andric         // 55     tile7.rows Tile 7 rows.
145fe6060f1SDimitry Andric         // 56-63  reserved, must be zero
14681ad6265SDimitry Andric         int RowOffset = 48 + TMMIdx;
14781ad6265SDimitry Andric         int ColOffset = 16 + TMMIdx * 2;
148fe6060f1SDimitry Andric 
14981ad6265SDimitry Andric         Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit);
15081ad6265SDimitry Andric         BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg);
15181ad6265SDimitry Andric         MachineInstrBuilder StoreRow =
15281ad6265SDimitry Andric             BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr));
15381ad6265SDimitry Andric         addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg);
15481ad6265SDimitry Andric 
15581ad6265SDimitry Andric         MachineInstrBuilder StoreCol =
15681ad6265SDimitry Andric             BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr));
15781ad6265SDimitry Andric         addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg);
15881ad6265SDimitry Andric       }
15981ad6265SDimitry Andric       ShapeInfos.clear();
16081ad6265SDimitry Andric       Change = true;
161fe6060f1SDimitry Andric     }
162fe6060f1SDimitry Andric   }
163fe6060f1SDimitry Andric 
16481ad6265SDimitry Andric   return Change;
165fe6060f1SDimitry Andric }
166fe6060f1SDimitry Andric 
167fe6060f1SDimitry Andric bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
168*0fca6ea1SDimitry Andric   X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
169*0fca6ea1SDimitry Andric   // Early exit in the common case of non-AMX code.
170*0fca6ea1SDimitry Andric   if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
171*0fca6ea1SDimitry Andric     return false;
172*0fca6ea1SDimitry Andric 
173fe6060f1SDimitry Andric   MF = &MFunc;
174fe6060f1SDimitry Andric   MRI = &MFunc.getRegInfo();
17581ad6265SDimitry Andric   const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();
176fe6060f1SDimitry Andric   TRI = ST->getRegisterInfo();
177fe6060f1SDimitry Andric   TII = MFunc.getSubtarget().getInstrInfo();
17881ad6265SDimitry Andric   bool Change = false;
179fe6060f1SDimitry Andric 
18081ad6265SDimitry Andric   // Loop over all of the basic blocks, eliminating virtual register references
18181ad6265SDimitry Andric   for (MachineBasicBlock &MBB : MFunc)
18281ad6265SDimitry Andric     Change |= configBasicBlock(MBB);
18381ad6265SDimitry Andric 
18481ad6265SDimitry Andric   return Change;
185fe6060f1SDimitry Andric }
186fe6060f1SDimitry Andric 
187fe6060f1SDimitry Andric FunctionPass *llvm::createX86FastTileConfigPass() {
188fe6060f1SDimitry Andric   return new X86FastTileConfig();
189fe6060f1SDimitry Andric }
190