1d4bdeca5SXiang1 Zhang //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===// 2d4bdeca5SXiang1 Zhang // 3d4bdeca5SXiang1 Zhang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4d4bdeca5SXiang1 Zhang // See https://llvm.org/LICENSE.txt for license information. 5d4bdeca5SXiang1 Zhang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6d4bdeca5SXiang1 Zhang // 7d4bdeca5SXiang1 Zhang //===----------------------------------------------------------------------===// 8d4bdeca5SXiang1 Zhang // 9d4bdeca5SXiang1 Zhang /// \file Pass to config the shape of AMX physical registers 10d4bdeca5SXiang1 Zhang /// AMX register need to be configured before use. Before FastRegAllocation pass 11d4bdeca5SXiang1 Zhang /// the ldtilecfg instruction is inserted, however at that time we don't 12d4bdeca5SXiang1 Zhang /// know the shape of each physical tile registers, because the register 1363233da7SLuo, Yuanke /// allocation is not done yet. This pass runs after register allocation 14d4bdeca5SXiang1 Zhang /// pass. It collects the shape information of each physical tile register 15d4bdeca5SXiang1 Zhang /// and store the shape in the stack slot that is allocated for load config 16d4bdeca5SXiang1 Zhang /// to tile config register. 17d4bdeca5SXiang1 Zhang // 18d4bdeca5SXiang1 Zhang //===----------------------------------------------------------------------===// 19d4bdeca5SXiang1 Zhang 20d4bdeca5SXiang1 Zhang #include "X86.h" 21d4bdeca5SXiang1 Zhang #include "X86InstrBuilder.h" 22d4bdeca5SXiang1 Zhang #include "X86MachineFunctionInfo.h" 23d4bdeca5SXiang1 Zhang #include "X86Subtarget.h" 24d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineFrameInfo.h" 25d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineFunctionPass.h" 26d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineInstr.h" 27d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineRegisterInfo.h" 28d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/Passes.h" 29d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/TargetInstrInfo.h" 30d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/TargetRegisterInfo.h" 31d4bdeca5SXiang1 Zhang 32d4bdeca5SXiang1 Zhang using namespace llvm; 33d4bdeca5SXiang1 Zhang 34d4bdeca5SXiang1 Zhang #define DEBUG_TYPE "fasttileconfig" 35d4bdeca5SXiang1 Zhang 36d4bdeca5SXiang1 Zhang namespace { 37d4bdeca5SXiang1 Zhang 38d4bdeca5SXiang1 Zhang class X86FastTileConfig : public MachineFunctionPass { 39d4bdeca5SXiang1 Zhang // context 40d4bdeca5SXiang1 Zhang MachineFunction *MF = nullptr; 41d4bdeca5SXiang1 Zhang const TargetInstrInfo *TII = nullptr; 42d4bdeca5SXiang1 Zhang MachineRegisterInfo *MRI = nullptr; 43496156acSLuo, Yuanke const TargetRegisterInfo *TRI = nullptr; 44c4dba471SLuo, Yuanke X86MachineFunctionInfo *X86FI = nullptr; 45d4bdeca5SXiang1 Zhang 46496156acSLuo, Yuanke bool configBasicBlock(MachineBasicBlock &MBB); 47d4bdeca5SXiang1 Zhang 48d4bdeca5SXiang1 Zhang public: 49d4bdeca5SXiang1 Zhang X86FastTileConfig() : MachineFunctionPass(ID) {} 50d4bdeca5SXiang1 Zhang 51d4bdeca5SXiang1 Zhang /// Return the pass name. 52d4bdeca5SXiang1 Zhang StringRef getPassName() const override { 53d4bdeca5SXiang1 Zhang return "Fast Tile Register Configure"; 54d4bdeca5SXiang1 Zhang } 55d4bdeca5SXiang1 Zhang 56496156acSLuo, Yuanke void getAnalysisUsage(AnalysisUsage &AU) const override { 57496156acSLuo, Yuanke AU.setPreservesAll(); 58496156acSLuo, Yuanke MachineFunctionPass::getAnalysisUsage(AU); 59496156acSLuo, Yuanke } 60d4bdeca5SXiang1 Zhang 61d4bdeca5SXiang1 Zhang /// Perform register allocation. 62d4bdeca5SXiang1 Zhang bool runOnMachineFunction(MachineFunction &MFunc) override; 63d4bdeca5SXiang1 Zhang 64d4bdeca5SXiang1 Zhang MachineFunctionProperties getRequiredProperties() const override { 65d4bdeca5SXiang1 Zhang return MachineFunctionProperties().set( 66d4bdeca5SXiang1 Zhang MachineFunctionProperties::Property::NoPHIs); 67d4bdeca5SXiang1 Zhang } 68d4bdeca5SXiang1 Zhang 69d4bdeca5SXiang1 Zhang static char ID; 70d4bdeca5SXiang1 Zhang }; 71d4bdeca5SXiang1 Zhang 72d4bdeca5SXiang1 Zhang } // end anonymous namespace 73d4bdeca5SXiang1 Zhang 74d4bdeca5SXiang1 Zhang char X86FastTileConfig::ID = 0; 75d4bdeca5SXiang1 Zhang 76d4bdeca5SXiang1 Zhang INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, 77d4bdeca5SXiang1 Zhang "Fast Tile Register Configure", false, false) 78d4bdeca5SXiang1 Zhang INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, 79d4bdeca5SXiang1 Zhang "Fast Tile Register Configure", false, false) 80d4bdeca5SXiang1 Zhang 81*c72a751dSPhoebe Wang static unsigned getNumDefTiles(MachineRegisterInfo *MRI, MachineInstr &MI) { 82496156acSLuo, Yuanke // There is no phi instruction after register allocation. 83496156acSLuo, Yuanke assert(MI.isPHI() == false); 84496156acSLuo, Yuanke // The instruction must have 3 operands: tile def, row, col. 85496156acSLuo, Yuanke // It should be AMX pseudo instruction that have shape operand. 86496156acSLuo, Yuanke if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 || 87496156acSLuo, Yuanke !MI.isPseudo()) 88*c72a751dSPhoebe Wang return 0; 89496156acSLuo, Yuanke MachineOperand &MO = MI.getOperand(0); 90d4bdeca5SXiang1 Zhang 91496156acSLuo, Yuanke if (MO.isReg()) { 92496156acSLuo, Yuanke Register Reg = MO.getReg(); 93*c72a751dSPhoebe Wang // FIXME: It may be used after Greedy RA and the physical 94496156acSLuo, Yuanke // register is not rewritten yet. 95*c72a751dSPhoebe Wang if (Reg.isVirtual()) { 96*c72a751dSPhoebe Wang if (MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) 97*c72a751dSPhoebe Wang return 1; 98*c72a751dSPhoebe Wang if (MRI->getRegClass(Reg)->getID() == X86::TILEPAIRRegClassID) 99*c72a751dSPhoebe Wang return 2; 100*c72a751dSPhoebe Wang } 101d4bdeca5SXiang1 Zhang if (Reg >= X86::TMM0 && Reg <= X86::TMM7) 102*c72a751dSPhoebe Wang return 1; 103*c72a751dSPhoebe Wang if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7) 104*c72a751dSPhoebe Wang return 2; 105496156acSLuo, Yuanke } 106496156acSLuo, Yuanke 107*c72a751dSPhoebe Wang return 0; 108*c72a751dSPhoebe Wang } 109*c72a751dSPhoebe Wang 110*c72a751dSPhoebe Wang static unsigned getTMMIndex(Register Reg) { 111*c72a751dSPhoebe Wang if (Reg >= X86::TMM0 && Reg <= X86::TMM7) 112*c72a751dSPhoebe Wang return Reg - X86::TMM0; 113*c72a751dSPhoebe Wang if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7) 114*c72a751dSPhoebe Wang return (Reg - X86::TMM0_TMM1) * 2; 115*c72a751dSPhoebe Wang llvm_unreachable("Invalid Tmm Reg!"); 116d4bdeca5SXiang1 Zhang } 117d4bdeca5SXiang1 Zhang 118496156acSLuo, Yuanke // PreTileConfig should configure the tile registers based on basic 119496156acSLuo, Yuanke // block. 120496156acSLuo, Yuanke bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { 121496156acSLuo, Yuanke bool Change = false; 122496156acSLuo, Yuanke SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos; 123496156acSLuo, Yuanke for (MachineInstr &MI : reverse(MBB)) { 124*c72a751dSPhoebe Wang unsigned DefNum = getNumDefTiles(MRI, MI); 125*c72a751dSPhoebe Wang if (DefNum == 0 && MI.getOpcode() != X86::PLDTILECFGV) 126d4bdeca5SXiang1 Zhang continue; 127496156acSLuo, Yuanke // AMX instructions that define tile register. 128aaaf9cedSLuo, Yuanke if (MI.getOpcode() != X86::PLDTILECFGV) { 129496156acSLuo, Yuanke MachineOperand &Row = MI.getOperand(1); 130*c72a751dSPhoebe Wang unsigned TMMIdx = getTMMIndex(MI.getOperand(0).getReg()); 131*c72a751dSPhoebe Wang for (unsigned I = 0; I < DefNum; I++) { 132*c72a751dSPhoebe Wang MachineOperand &Col = MI.getOperand(2 + I); 133*c72a751dSPhoebe Wang ShapeInfos.push_back({TMMIdx + I, ShapeT(&Row, &Col)}); 134*c72a751dSPhoebe Wang } 135aaaf9cedSLuo, Yuanke } else { // PLDTILECFGV 136496156acSLuo, Yuanke // Rewrite the shape information to memory. Stack slot should have 137496156acSLuo, Yuanke // been initialized to zero in pre config. 138496156acSLuo, Yuanke int SS = MI.getOperand(0).getIndex(); // tile config stack slot. 139496156acSLuo, Yuanke for (auto &ShapeInfo : ShapeInfos) { 140496156acSLuo, Yuanke DebugLoc DL; 141496156acSLuo, Yuanke unsigned TMMIdx = ShapeInfo.first; 142496156acSLuo, Yuanke Register RowReg = ShapeInfo.second.getRow()->getReg(); 143496156acSLuo, Yuanke Register ColReg = ShapeInfo.second.getCol()->getReg(); 144d4bdeca5SXiang1 Zhang // Here is the data format for the tile config. 145496156acSLuo, Yuanke // 0 palette 146496156acSLuo, Yuanke // 1 start_row 147d4bdeca5SXiang1 Zhang // 2-15 reserved, must be zero 148d4bdeca5SXiang1 Zhang // 16-17 tile0.colsb Tile 0 bytes per row. 149d4bdeca5SXiang1 Zhang // 18-19 tile1.colsb Tile 1 bytes per row. 150d4bdeca5SXiang1 Zhang // 20-21 tile2.colsb Tile 2 bytes per row. 151d4bdeca5SXiang1 Zhang // ... (sequence continues) 152d4bdeca5SXiang1 Zhang // 30-31 tile7.colsb Tile 7 bytes per row. 153d4bdeca5SXiang1 Zhang // 32-47 reserved, must be zero 154d4bdeca5SXiang1 Zhang // 48 tile0.rows Tile 0 rows. 155d4bdeca5SXiang1 Zhang // 49 tile1.rows Tile 1 rows. 156d4bdeca5SXiang1 Zhang // 50 tile2.rows Tile 2 rows. 157d4bdeca5SXiang1 Zhang // ... (sequence continues) 158d4bdeca5SXiang1 Zhang // 55 tile7.rows Tile 7 rows. 159d4bdeca5SXiang1 Zhang // 56-63 reserved, must be zero 160496156acSLuo, Yuanke int RowOffset = 48 + TMMIdx; 161496156acSLuo, Yuanke int ColOffset = 16 + TMMIdx * 2; 162d4bdeca5SXiang1 Zhang 163496156acSLuo, Yuanke Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit); 164496156acSLuo, Yuanke BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg); 165496156acSLuo, Yuanke MachineInstrBuilder StoreRow = 166496156acSLuo, Yuanke BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr)); 167496156acSLuo, Yuanke addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg); 168496156acSLuo, Yuanke 169496156acSLuo, Yuanke MachineInstrBuilder StoreCol = 170496156acSLuo, Yuanke BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr)); 171496156acSLuo, Yuanke addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg); 172496156acSLuo, Yuanke } 173496156acSLuo, Yuanke ShapeInfos.clear(); 174496156acSLuo, Yuanke Change = true; 175d4bdeca5SXiang1 Zhang } 176d4bdeca5SXiang1 Zhang } 177d4bdeca5SXiang1 Zhang 178496156acSLuo, Yuanke return Change; 179d4bdeca5SXiang1 Zhang } 180d4bdeca5SXiang1 Zhang 181d4bdeca5SXiang1 Zhang bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { 1829a2c8418Saengelke X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); 1839a2c8418Saengelke // Early exit in the common case of non-AMX code. 1849a2c8418Saengelke if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA) 1859a2c8418Saengelke return false; 1869a2c8418Saengelke 187d4bdeca5SXiang1 Zhang MF = &MFunc; 188d4bdeca5SXiang1 Zhang MRI = &MFunc.getRegInfo(); 189496156acSLuo, Yuanke const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>(); 190d4bdeca5SXiang1 Zhang TRI = ST->getRegisterInfo(); 191d4bdeca5SXiang1 Zhang TII = MFunc.getSubtarget().getInstrInfo(); 192496156acSLuo, Yuanke bool Change = false; 193d4bdeca5SXiang1 Zhang 194496156acSLuo, Yuanke // Loop over all of the basic blocks, eliminating virtual register references 195496156acSLuo, Yuanke for (MachineBasicBlock &MBB : MFunc) 196496156acSLuo, Yuanke Change |= configBasicBlock(MBB); 197496156acSLuo, Yuanke 198496156acSLuo, Yuanke return Change; 199d4bdeca5SXiang1 Zhang } 200d4bdeca5SXiang1 Zhang 201d4bdeca5SXiang1 Zhang FunctionPass *llvm::createX86FastTileConfigPass() { 202d4bdeca5SXiang1 Zhang return new X86FastTileConfig(); 203d4bdeca5SXiang1 Zhang } 204