1e8d8bef9SDimitry Andric //===-- X86TileConfig.cpp - Tile Register Configure----------------------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric /// \file Pass to config the shape of AMX physical registers 10e8d8bef9SDimitry Andric /// AMX register need to be configured before use. In X86PreTileConfig pass 11e8d8bef9SDimitry Andric /// the pldtilecfg instruction is inserted, however at that time we don't 12e8d8bef9SDimitry Andric /// know the shape of each physical tile registers, because the register 13e8d8bef9SDimitry Andric /// allocation is not done yet. This pass runs after egister allocation 14e8d8bef9SDimitry Andric /// pass. It collects the shape information of each physical tile register 15e8d8bef9SDimitry Andric /// and store the shape in the stack slot that is allocated for load config 16e8d8bef9SDimitry Andric /// to tile config register. 17e8d8bef9SDimitry Andric // 18e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 19e8d8bef9SDimitry Andric 20e8d8bef9SDimitry Andric #include "X86.h" 21e8d8bef9SDimitry Andric #include "X86InstrBuilder.h" 22e8d8bef9SDimitry Andric #include "X86MachineFunctionInfo.h" 23e8d8bef9SDimitry Andric #include "X86RegisterInfo.h" 24e8d8bef9SDimitry Andric #include "X86Subtarget.h" 25e8d8bef9SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 26e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 27e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 28e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 29e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 30e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h" 31e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 32e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 33e8d8bef9SDimitry Andric #include "llvm/CodeGen/TileShapeInfo.h" 34e8d8bef9SDimitry Andric #include "llvm/CodeGen/VirtRegMap.h" 35e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 36e8d8bef9SDimitry Andric 37e8d8bef9SDimitry Andric using namespace llvm; 38e8d8bef9SDimitry Andric 3981ad6265SDimitry Andric #define DEBUG_TYPE "tileconfig" 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric namespace { 42e8d8bef9SDimitry Andric 43fe6060f1SDimitry Andric struct X86TileConfig : public MachineFunctionPass { 44e8d8bef9SDimitry Andric 45e8d8bef9SDimitry Andric X86TileConfig() : MachineFunctionPass(ID) {} 46e8d8bef9SDimitry Andric 47e8d8bef9SDimitry Andric /// Return the pass name. 48e8d8bef9SDimitry Andric StringRef getPassName() const override { return "Tile Register Configure"; } 49e8d8bef9SDimitry Andric 50e8d8bef9SDimitry Andric /// X86TileConfig analysis usage. 51fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 52fe6060f1SDimitry Andric AU.setPreservesAll(); 53fe6060f1SDimitry Andric AU.addRequired<VirtRegMap>(); 54*0fca6ea1SDimitry Andric AU.addRequired<LiveIntervalsWrapperPass>(); 55fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 56fe6060f1SDimitry Andric } 57e8d8bef9SDimitry Andric 58e8d8bef9SDimitry Andric /// Perform register allocation. 59e8d8bef9SDimitry Andric bool runOnMachineFunction(MachineFunction &mf) override; 60e8d8bef9SDimitry Andric 61e8d8bef9SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 62e8d8bef9SDimitry Andric return MachineFunctionProperties().set( 63e8d8bef9SDimitry Andric MachineFunctionProperties::Property::NoPHIs); 64e8d8bef9SDimitry Andric } 65e8d8bef9SDimitry Andric 66e8d8bef9SDimitry Andric static char ID; 67e8d8bef9SDimitry Andric }; 68e8d8bef9SDimitry Andric 69e8d8bef9SDimitry Andric } // end anonymous namespace 70e8d8bef9SDimitry Andric 71e8d8bef9SDimitry Andric char X86TileConfig::ID = 0; 72e8d8bef9SDimitry Andric 7381ad6265SDimitry Andric INITIALIZE_PASS_BEGIN(X86TileConfig, DEBUG_TYPE, "Tile Register Configure", 74e8d8bef9SDimitry Andric false, false) 75e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 7681ad6265SDimitry Andric INITIALIZE_PASS_END(X86TileConfig, DEBUG_TYPE, "Tile Register Configure", false, 7781ad6265SDimitry Andric false) 78e8d8bef9SDimitry Andric 79fe6060f1SDimitry Andric bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) { 80*0fca6ea1SDimitry Andric X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 81*0fca6ea1SDimitry Andric // Early exit in the common case of non-AMX code. 82*0fca6ea1SDimitry Andric if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA) 83*0fca6ea1SDimitry Andric return false; 84*0fca6ea1SDimitry Andric 85fe6060f1SDimitry Andric const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 86fe6060f1SDimitry Andric const TargetRegisterInfo *TRI = ST.getRegisterInfo(); 87fe6060f1SDimitry Andric const TargetInstrInfo *TII = ST.getInstrInfo(); 88fe6060f1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 89*0fca6ea1SDimitry Andric LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS(); 90fe6060f1SDimitry Andric VirtRegMap &VRM = getAnalysis<VirtRegMap>(); 91fe6060f1SDimitry Andric 92fe6060f1SDimitry Andric if (VRM.isShapeMapEmpty()) 93fe6060f1SDimitry Andric return false; 94fe6060f1SDimitry Andric 95fe6060f1SDimitry Andric int SS = INT_MAX; 96fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 97fe6060f1SDimitry Andric for (MachineInstr &MI : MBB) { 9881ad6265SDimitry Andric if (MI.getOpcode() == X86::PLDTILECFGV) { 99fe6060f1SDimitry Andric SS = MI.getOperand(0).getIndex(); 100fe6060f1SDimitry Andric break; 101fe6060f1SDimitry Andric } 102fe6060f1SDimitry Andric } 103fe6060f1SDimitry Andric if (SS != INT_MAX) 104fe6060f1SDimitry Andric break; 105e8d8bef9SDimitry Andric } 10681ad6265SDimitry Andric // Didn't find PLDTILECFGV, just return false; 10781ad6265SDimitry Andric if (SS == INT_MAX) 10881ad6265SDimitry Andric return false; 109e8d8bef9SDimitry Andric 110fe6060f1SDimitry Andric // Try to find a point to insert MIs for constant shapes. 111fe6060f1SDimitry Andric // Here we are leveraging the palette id inserted in PreRA pass. 112fe6060f1SDimitry Andric unsigned ConstPos = 0; 113fe6060f1SDimitry Andric MachineInstr *ConstMI = nullptr; 114fe6060f1SDimitry Andric for (MachineInstr &MI : MF.front()) { 115fe6060f1SDimitry Andric if (MI.getOpcode() == X86::MOV8mi && SS == MI.getOperand(0).getIndex()) { 116fe6060f1SDimitry Andric ConstMI = &MI; 117fe6060f1SDimitry Andric break; 118fe6060f1SDimitry Andric } 119fe6060f1SDimitry Andric ++ConstPos; 120fe6060f1SDimitry Andric } 121fe6060f1SDimitry Andric assert(ConstMI && "Cannot find an insertion point"); 122fe6060f1SDimitry Andric 123fe6060f1SDimitry Andric unsigned AMXRegNum = TRI->getRegClass(X86::TILERegClassID)->getNumRegs(); 124fe6060f1SDimitry Andric SmallVector<Register, 8> Phys2Virt(AMXRegNum, 0); 125fe6060f1SDimitry Andric for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { 126fe6060f1SDimitry Andric Register VirtReg = Register::index2VirtReg(I); 127fe6060f1SDimitry Andric if (MRI.reg_nodbg_empty(VirtReg)) 128fe6060f1SDimitry Andric continue; 129fe6060f1SDimitry Andric if (MRI.getRegClass(VirtReg)->getID() != X86::TILERegClassID) 130fe6060f1SDimitry Andric continue; 13181ad6265SDimitry Andric if (VRM.getPhys(VirtReg) == VirtRegMap::NO_PHYS_REG) 13281ad6265SDimitry Andric continue; 133fe6060f1SDimitry Andric unsigned Index = VRM.getPhys(VirtReg) - X86::TMM0; 134fe6060f1SDimitry Andric if (!Phys2Virt[Index]) 135fe6060f1SDimitry Andric Phys2Virt[Index] = VirtReg; 136e8d8bef9SDimitry Andric } 137e8d8bef9SDimitry Andric 138e8d8bef9SDimitry Andric // Fill in the shape of each tile physical register. 139fe6060f1SDimitry Andric for (unsigned I = 0; I < AMXRegNum; ++I) { 140fe6060f1SDimitry Andric if (!Phys2Virt[I]) 141e8d8bef9SDimitry Andric continue; 142fe6060f1SDimitry Andric DebugLoc DL; 143fe6060f1SDimitry Andric bool IsRow = true; 144fe6060f1SDimitry Andric MachineInstr *NewMI = nullptr; 145fe6060f1SDimitry Andric ShapeT Shape = VRM.getShape(Phys2Virt[I]); 146fe6060f1SDimitry Andric for (auto &R : {Shape.getRow()->getReg(), Shape.getCol()->getReg()}) { 147e8d8bef9SDimitry Andric // Here is the data format for the tile config. 148e8d8bef9SDimitry Andric // 0 palette 149e8d8bef9SDimitry Andric // 1 start_row 150e8d8bef9SDimitry Andric // 2-15 reserved, must be zero 151e8d8bef9SDimitry Andric // 16-17 tile0.colsb Tile 0 bytes per row. 152e8d8bef9SDimitry Andric // 18-19 tile1.colsb Tile 1 bytes per row. 153e8d8bef9SDimitry Andric // 20-21 tile2.colsb Tile 2 bytes per row. 154e8d8bef9SDimitry Andric // ... (sequence continues) 155e8d8bef9SDimitry Andric // 30-31 tile7.colsb Tile 7 bytes per row. 156e8d8bef9SDimitry Andric // 32-47 reserved, must be zero 157e8d8bef9SDimitry Andric // 48 tile0.rows Tile 0 rows. 158e8d8bef9SDimitry Andric // 49 tile1.rows Tile 1 rows. 159e8d8bef9SDimitry Andric // 50 tile2.rows Tile 2 rows. 160e8d8bef9SDimitry Andric // ... (sequence continues) 161e8d8bef9SDimitry Andric // 55 tile7.rows Tile 7 rows. 162e8d8bef9SDimitry Andric // 56-63 reserved, must be zero 163fe6060f1SDimitry Andric int64_t Imm = INT64_MAX; 164fe6060f1SDimitry Andric int Offset = IsRow ? 48 + I : 16 + I * 2; 165fe6060f1SDimitry Andric for (auto &DefMI : MRI.def_instructions(R)) { 166fe6060f1SDimitry Andric MachineBasicBlock &MBB = *DefMI.getParent(); 167fe6060f1SDimitry Andric if (DefMI.isMoveImmediate()) { 168fe6060f1SDimitry Andric if (Imm != INT64_MAX) { 169fe6060f1SDimitry Andric // FIXME: We should handle this case in future. 170fe6060f1SDimitry Andric assert(Imm == DefMI.getOperand(1).getImm() && 171fe6060f1SDimitry Andric "Cannot initialize with different shapes"); 172fe6060f1SDimitry Andric continue; 173fe6060f1SDimitry Andric } 174fe6060f1SDimitry Andric Imm = DefMI.getOperand(1).getImm(); 175fe6060f1SDimitry Andric NewMI = addFrameReference( 176fe6060f1SDimitry Andric BuildMI(MF.front(), ++ConstMI->getIterator(), DL, 177fe6060f1SDimitry Andric TII->get(IsRow ? X86::MOV8mi : X86::MOV16mi)), 178fe6060f1SDimitry Andric SS, Offset) 179fe6060f1SDimitry Andric .addImm(Imm); 180fe6060f1SDimitry Andric ConstMI = NewMI; 181fe6060f1SDimitry Andric LIS.InsertMachineInstrInMaps(*NewMI); 182fe6060f1SDimitry Andric } else { 183fe6060f1SDimitry Andric unsigned SubIdx = IsRow ? X86::sub_8bit : X86::sub_16bit; 184fe6060f1SDimitry Andric unsigned RegSize = TRI->getRegSizeInBits(*MRI.getRegClass(R)); 185fe6060f1SDimitry Andric if ((IsRow && RegSize == 8) || (!IsRow && RegSize == 16)) 186fe6060f1SDimitry Andric SubIdx = 0; 187fe6060f1SDimitry Andric auto Iter = DefMI.getIterator(); 188fe6060f1SDimitry Andric if (&MBB == &MF.front() && 189fe6060f1SDimitry Andric (unsigned)std::distance(MBB.instr_begin(), Iter) < ConstPos) 190fe6060f1SDimitry Andric Iter = ConstMI->getIterator(); 191fe6060f1SDimitry Andric NewMI = addFrameReference( 192fe6060f1SDimitry Andric BuildMI(MBB, ++Iter, DL, 193fe6060f1SDimitry Andric TII->get(IsRow ? X86::MOV8mr : X86::MOV16mr)), 194fe6060f1SDimitry Andric SS, Offset) 195fe6060f1SDimitry Andric .addReg(R, 0, SubIdx); 196fe6060f1SDimitry Andric SlotIndex SIdx = LIS.InsertMachineInstrInMaps(*NewMI); 197fe6060f1SDimitry Andric LIS.extendToIndices(LIS.getInterval(R), {SIdx.getRegSlot()}); 198e8d8bef9SDimitry Andric } 199e8d8bef9SDimitry Andric } 200fe6060f1SDimitry Andric IsRow = false; 201e8d8bef9SDimitry Andric } 202e8d8bef9SDimitry Andric } 203e8d8bef9SDimitry Andric return true; 204e8d8bef9SDimitry Andric } 205e8d8bef9SDimitry Andric 206e8d8bef9SDimitry Andric FunctionPass *llvm::createX86TileConfigPass() { return new X86TileConfig(); } 207