1fe6060f1SDimitry Andric //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric /// \file Pass to config the shape of AMX physical registers 10fe6060f1SDimitry Andric /// AMX register need to be configured before use. Before FastRegAllocation pass 11fe6060f1SDimitry Andric /// the ldtilecfg instruction is inserted, however at that time we don't 12fe6060f1SDimitry Andric /// know the shape of each physical tile registers, because the register 13fe6060f1SDimitry Andric /// allocation is not done yet. This pass runs after register allocation 14fe6060f1SDimitry Andric /// pass. It collects the shape information of each physical tile register 15fe6060f1SDimitry Andric /// and store the shape in the stack slot that is allocated for load config 16fe6060f1SDimitry Andric /// to tile config register. 17fe6060f1SDimitry Andric // 18fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 19fe6060f1SDimitry Andric 20fe6060f1SDimitry Andric #include "X86.h" 21fe6060f1SDimitry Andric #include "X86InstrBuilder.h" 22fe6060f1SDimitry Andric #include "X86MachineFunctionInfo.h" 23fe6060f1SDimitry Andric #include "X86RegisterInfo.h" 24fe6060f1SDimitry Andric #include "X86Subtarget.h" 25fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 26fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 27fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 28fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 29fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h" 30fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 31fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 32fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 33fe6060f1SDimitry Andric 34fe6060f1SDimitry Andric using namespace llvm; 35fe6060f1SDimitry Andric 36fe6060f1SDimitry Andric #define DEBUG_TYPE "fasttileconfig" 37fe6060f1SDimitry Andric 38fe6060f1SDimitry Andric namespace { 39fe6060f1SDimitry Andric 40fe6060f1SDimitry Andric class X86FastTileConfig : public MachineFunctionPass { 41fe6060f1SDimitry Andric // context 42fe6060f1SDimitry Andric MachineFunction *MF = nullptr; 43fe6060f1SDimitry Andric const TargetInstrInfo *TII = nullptr; 44fe6060f1SDimitry Andric MachineRegisterInfo *MRI = nullptr; 4581ad6265SDimitry Andric const TargetRegisterInfo *TRI = nullptr; 46349cc55cSDimitry Andric X86MachineFunctionInfo *X86FI = nullptr; 47fe6060f1SDimitry Andric 4881ad6265SDimitry Andric bool configBasicBlock(MachineBasicBlock &MBB); 49fe6060f1SDimitry Andric 50fe6060f1SDimitry Andric public: 51fe6060f1SDimitry Andric X86FastTileConfig() : MachineFunctionPass(ID) {} 52fe6060f1SDimitry Andric 53fe6060f1SDimitry Andric /// Return the pass name. 54fe6060f1SDimitry Andric StringRef getPassName() const override { 55fe6060f1SDimitry Andric return "Fast Tile Register Configure"; 56fe6060f1SDimitry Andric } 57fe6060f1SDimitry Andric 5881ad6265SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 5981ad6265SDimitry Andric AU.setPreservesAll(); 6081ad6265SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 6181ad6265SDimitry Andric } 62fe6060f1SDimitry Andric 63fe6060f1SDimitry Andric /// Perform register allocation. 64fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MFunc) override; 65fe6060f1SDimitry Andric 66fe6060f1SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 67fe6060f1SDimitry Andric return MachineFunctionProperties().set( 68fe6060f1SDimitry Andric MachineFunctionProperties::Property::NoPHIs); 69fe6060f1SDimitry Andric } 70fe6060f1SDimitry Andric 71fe6060f1SDimitry Andric static char ID; 72fe6060f1SDimitry Andric }; 73fe6060f1SDimitry Andric 74fe6060f1SDimitry Andric } // end anonymous namespace 75fe6060f1SDimitry Andric 76fe6060f1SDimitry Andric char X86FastTileConfig::ID = 0; 77fe6060f1SDimitry Andric 78fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, 79fe6060f1SDimitry Andric "Fast Tile Register Configure", false, false) 80fe6060f1SDimitry Andric INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, 81fe6060f1SDimitry Andric "Fast Tile Register Configure", false, false) 82fe6060f1SDimitry Andric 8381ad6265SDimitry Andric static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { 8481ad6265SDimitry Andric // There is no phi instruction after register allocation. 8581ad6265SDimitry Andric assert(MI.isPHI() == false); 8681ad6265SDimitry Andric // The instruction must have 3 operands: tile def, row, col. 8781ad6265SDimitry Andric // It should be AMX pseudo instruction that have shape operand. 8881ad6265SDimitry Andric if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 || 8981ad6265SDimitry Andric !MI.isPseudo()) 90fe6060f1SDimitry Andric return false; 9181ad6265SDimitry Andric MachineOperand &MO = MI.getOperand(0); 92fe6060f1SDimitry Andric 9381ad6265SDimitry Andric if (MO.isReg()) { 9481ad6265SDimitry Andric Register Reg = MO.getReg(); 9581ad6265SDimitry Andric // FIXME it may be used after Greedy RA and the physical 9681ad6265SDimitry Andric // register is not rewritten yet. 9781ad6265SDimitry Andric if (Reg.isVirtual() && 9881ad6265SDimitry Andric MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) 9981ad6265SDimitry Andric return true; 100fe6060f1SDimitry Andric if (Reg >= X86::TMM0 && Reg <= X86::TMM7) 101fe6060f1SDimitry Andric return true; 10281ad6265SDimitry Andric } 10381ad6265SDimitry Andric 104fe6060f1SDimitry Andric return false; 105fe6060f1SDimitry Andric } 106fe6060f1SDimitry Andric 10781ad6265SDimitry Andric // PreTileConfig should configure the tile registers based on basic 10881ad6265SDimitry Andric // block. 10981ad6265SDimitry Andric bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { 11081ad6265SDimitry Andric bool Change = false; 11181ad6265SDimitry Andric SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos; 11281ad6265SDimitry Andric for (MachineInstr &MI : reverse(MBB)) { 11381ad6265SDimitry Andric if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV) 114fe6060f1SDimitry Andric continue; 11581ad6265SDimitry Andric // AMX instructions that define tile register. 11681ad6265SDimitry Andric if (MI.getOpcode() != X86::PLDTILECFGV) { 11781ad6265SDimitry Andric MachineOperand &Row = MI.getOperand(1); 11881ad6265SDimitry Andric MachineOperand &Col = MI.getOperand(2); 11981ad6265SDimitry Andric unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0; 12081ad6265SDimitry Andric ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)}); 12181ad6265SDimitry Andric } else { // PLDTILECFGV 12281ad6265SDimitry Andric // Rewrite the shape information to memory. Stack slot should have 12381ad6265SDimitry Andric // been initialized to zero in pre config. 12481ad6265SDimitry Andric int SS = MI.getOperand(0).getIndex(); // tile config stack slot. 12581ad6265SDimitry Andric for (auto &ShapeInfo : ShapeInfos) { 12681ad6265SDimitry Andric DebugLoc DL; 12781ad6265SDimitry Andric unsigned TMMIdx = ShapeInfo.first; 12881ad6265SDimitry Andric Register RowReg = ShapeInfo.second.getRow()->getReg(); 12981ad6265SDimitry Andric Register ColReg = ShapeInfo.second.getCol()->getReg(); 130fe6060f1SDimitry Andric // Here is the data format for the tile config. 13181ad6265SDimitry Andric // 0 palette 13281ad6265SDimitry Andric // 1 start_row 133fe6060f1SDimitry Andric // 2-15 reserved, must be zero 134fe6060f1SDimitry Andric // 16-17 tile0.colsb Tile 0 bytes per row. 135fe6060f1SDimitry Andric // 18-19 tile1.colsb Tile 1 bytes per row. 136fe6060f1SDimitry Andric // 20-21 tile2.colsb Tile 2 bytes per row. 137fe6060f1SDimitry Andric // ... (sequence continues) 138fe6060f1SDimitry Andric // 30-31 tile7.colsb Tile 7 bytes per row. 139fe6060f1SDimitry Andric // 32-47 reserved, must be zero 140fe6060f1SDimitry Andric // 48 tile0.rows Tile 0 rows. 141fe6060f1SDimitry Andric // 49 tile1.rows Tile 1 rows. 142fe6060f1SDimitry Andric // 50 tile2.rows Tile 2 rows. 143fe6060f1SDimitry Andric // ... (sequence continues) 144fe6060f1SDimitry Andric // 55 tile7.rows Tile 7 rows. 145fe6060f1SDimitry Andric // 56-63 reserved, must be zero 14681ad6265SDimitry Andric int RowOffset = 48 + TMMIdx; 14781ad6265SDimitry Andric int ColOffset = 16 + TMMIdx * 2; 148fe6060f1SDimitry Andric 14981ad6265SDimitry Andric Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit); 15081ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg); 15181ad6265SDimitry Andric MachineInstrBuilder StoreRow = 15281ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr)); 15381ad6265SDimitry Andric addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg); 15481ad6265SDimitry Andric 15581ad6265SDimitry Andric MachineInstrBuilder StoreCol = 15681ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr)); 15781ad6265SDimitry Andric addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg); 15881ad6265SDimitry Andric } 15981ad6265SDimitry Andric ShapeInfos.clear(); 16081ad6265SDimitry Andric Change = true; 161fe6060f1SDimitry Andric } 162fe6060f1SDimitry Andric } 163fe6060f1SDimitry Andric 16481ad6265SDimitry Andric return Change; 165fe6060f1SDimitry Andric } 166fe6060f1SDimitry Andric 167fe6060f1SDimitry Andric bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { 168*0fca6ea1SDimitry Andric X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); 169*0fca6ea1SDimitry Andric // Early exit in the common case of non-AMX code. 170*0fca6ea1SDimitry Andric if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA) 171*0fca6ea1SDimitry Andric return false; 172*0fca6ea1SDimitry Andric 173fe6060f1SDimitry Andric MF = &MFunc; 174fe6060f1SDimitry Andric MRI = &MFunc.getRegInfo(); 17581ad6265SDimitry Andric const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>(); 176fe6060f1SDimitry Andric TRI = ST->getRegisterInfo(); 177fe6060f1SDimitry Andric TII = MFunc.getSubtarget().getInstrInfo(); 17881ad6265SDimitry Andric bool Change = false; 179fe6060f1SDimitry Andric 18081ad6265SDimitry Andric // Loop over all of the basic blocks, eliminating virtual register references 18181ad6265SDimitry Andric for (MachineBasicBlock &MBB : MFunc) 18281ad6265SDimitry Andric Change |= configBasicBlock(MBB); 18381ad6265SDimitry Andric 18481ad6265SDimitry Andric return Change; 185fe6060f1SDimitry Andric } 186fe6060f1SDimitry Andric 187fe6060f1SDimitry Andric FunctionPass *llvm::createX86FastTileConfigPass() { 188fe6060f1SDimitry Andric return new X86FastTileConfig(); 189fe6060f1SDimitry Andric } 190