1*fe6060f1SDimitry Andric //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric // 9*fe6060f1SDimitry Andric /// \file Pass to config the shape of AMX physical registers 10*fe6060f1SDimitry Andric /// AMX register need to be configured before use. Before FastRegAllocation pass 11*fe6060f1SDimitry Andric /// the ldtilecfg instruction is inserted, however at that time we don't 12*fe6060f1SDimitry Andric /// know the shape of each physical tile registers, because the register 13*fe6060f1SDimitry Andric /// allocation is not done yet. This pass runs after register allocation 14*fe6060f1SDimitry Andric /// pass. It collects the shape information of each physical tile register 15*fe6060f1SDimitry Andric /// and store the shape in the stack slot that is allocated for load config 16*fe6060f1SDimitry Andric /// to tile config register. 17*fe6060f1SDimitry Andric // 18*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 19*fe6060f1SDimitry Andric 20*fe6060f1SDimitry Andric #include "X86.h" 21*fe6060f1SDimitry Andric #include "X86InstrBuilder.h" 22*fe6060f1SDimitry Andric #include "X86MachineFunctionInfo.h" 23*fe6060f1SDimitry Andric #include "X86RegisterInfo.h" 24*fe6060f1SDimitry Andric #include "X86Subtarget.h" 25*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 26*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 27*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 28*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 29*fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h" 30*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 31*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 32*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 33*fe6060f1SDimitry Andric 34*fe6060f1SDimitry Andric using namespace llvm; 35*fe6060f1SDimitry Andric 36*fe6060f1SDimitry Andric #define DEBUG_TYPE "fasttileconfig" 37*fe6060f1SDimitry Andric 38*fe6060f1SDimitry Andric namespace { 39*fe6060f1SDimitry Andric 40*fe6060f1SDimitry Andric class X86FastTileConfig : public MachineFunctionPass { 41*fe6060f1SDimitry Andric // context 42*fe6060f1SDimitry Andric MachineFunction *MF = nullptr; 43*fe6060f1SDimitry Andric const X86Subtarget *ST = nullptr; 44*fe6060f1SDimitry Andric const TargetRegisterInfo *TRI = nullptr; 45*fe6060f1SDimitry Andric const TargetInstrInfo *TII = nullptr; 46*fe6060f1SDimitry Andric MachineRegisterInfo *MRI = nullptr; 47*fe6060f1SDimitry Andric 48*fe6060f1SDimitry Andric MachineInstr *getTileConfigPoint(); 49*fe6060f1SDimitry Andric void tileConfig(); 50*fe6060f1SDimitry Andric 51*fe6060f1SDimitry Andric public: 52*fe6060f1SDimitry Andric X86FastTileConfig() : MachineFunctionPass(ID) {} 53*fe6060f1SDimitry Andric 54*fe6060f1SDimitry Andric bool fastTileConfig(); 55*fe6060f1SDimitry Andric bool isTileLoad(MachineInstr &MI); 56*fe6060f1SDimitry Andric bool isTileStore(MachineInstr &MI); 57*fe6060f1SDimitry Andric bool isAMXInstr(MachineInstr &MI); 58*fe6060f1SDimitry Andric void getTileStoreShape(MachineInstr &MI, 59*fe6060f1SDimitry Andric SmallVector<MachineOperand *> &ShapedTiles); 60*fe6060f1SDimitry Andric 61*fe6060f1SDimitry Andric MachineInstr *getKeyAMXInstr(MachineInstr *MI); 62*fe6060f1SDimitry Andric void getTileShapesCfg(MachineInstr *MI, 63*fe6060f1SDimitry Andric SmallVector<MachineOperand *> &ShapedTiles); 64*fe6060f1SDimitry Andric void getShapeCfgInstrs(MachineInstr *MI, 65*fe6060f1SDimitry Andric std::map<unsigned, MachineInstr *> &RowCfgs, 66*fe6060f1SDimitry Andric std::map<unsigned, MachineInstr *> &ColCfgs); 67*fe6060f1SDimitry Andric 68*fe6060f1SDimitry Andric /// Return the pass name. 69*fe6060f1SDimitry Andric StringRef getPassName() const override { 70*fe6060f1SDimitry Andric return "Fast Tile Register Configure"; 71*fe6060f1SDimitry Andric } 72*fe6060f1SDimitry Andric 73*fe6060f1SDimitry Andric void materializeTileCfg(MachineInstr *MI); 74*fe6060f1SDimitry Andric 75*fe6060f1SDimitry Andric void rewriteTileCfg(SmallVector<MachineOperand *> &ShapedTiles, 76*fe6060f1SDimitry Andric std::map<unsigned, MachineInstr *> &RowCfgs, 77*fe6060f1SDimitry Andric std::map<unsigned, MachineInstr *> &ColCfgs); 78*fe6060f1SDimitry Andric 79*fe6060f1SDimitry Andric /// Perform register allocation. 80*fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MFunc) override; 81*fe6060f1SDimitry Andric 82*fe6060f1SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 83*fe6060f1SDimitry Andric return MachineFunctionProperties().set( 84*fe6060f1SDimitry Andric MachineFunctionProperties::Property::NoPHIs); 85*fe6060f1SDimitry Andric } 86*fe6060f1SDimitry Andric 87*fe6060f1SDimitry Andric static char ID; 88*fe6060f1SDimitry Andric }; 89*fe6060f1SDimitry Andric 90*fe6060f1SDimitry Andric } // end anonymous namespace 91*fe6060f1SDimitry Andric 92*fe6060f1SDimitry Andric char X86FastTileConfig::ID = 0; 93*fe6060f1SDimitry Andric 94*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, 95*fe6060f1SDimitry Andric "Fast Tile Register Configure", false, false) 96*fe6060f1SDimitry Andric INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, 97*fe6060f1SDimitry Andric "Fast Tile Register Configure", false, false) 98*fe6060f1SDimitry Andric 99*fe6060f1SDimitry Andric static bool isTilePhysReg(MachineOperand &Op) { 100*fe6060f1SDimitry Andric if (!Op.isReg()) 101*fe6060f1SDimitry Andric return false; 102*fe6060f1SDimitry Andric 103*fe6060f1SDimitry Andric Register Reg = Op.getReg(); 104*fe6060f1SDimitry Andric if (Reg >= X86::TMM0 && Reg <= X86::TMM7) 105*fe6060f1SDimitry Andric return true; 106*fe6060f1SDimitry Andric return false; 107*fe6060f1SDimitry Andric } 108*fe6060f1SDimitry Andric 109*fe6060f1SDimitry Andric static unsigned getTilePhysRegIdx(MachineOperand *Op) { 110*fe6060f1SDimitry Andric assert(isTilePhysReg(*Op) && "Tile Operand is invalid"); 111*fe6060f1SDimitry Andric return Op->getReg() - X86::TMM0; 112*fe6060f1SDimitry Andric } 113*fe6060f1SDimitry Andric 114*fe6060f1SDimitry Andric static inline void adjustRowCfg(unsigned TIdx, MachineInstr *MI) { 115*fe6060f1SDimitry Andric unsigned Offset = 48 + TIdx; 116*fe6060f1SDimitry Andric MI->getOperand(3).ChangeToImmediate(Offset); 117*fe6060f1SDimitry Andric } 118*fe6060f1SDimitry Andric 119*fe6060f1SDimitry Andric static inline void adjustColCfg(unsigned TIdx, MachineInstr *MI) { 120*fe6060f1SDimitry Andric unsigned Offset = 16 + TIdx * 2; 121*fe6060f1SDimitry Andric MI->getOperand(3).ChangeToImmediate(Offset); 122*fe6060f1SDimitry Andric } 123*fe6060f1SDimitry Andric 124*fe6060f1SDimitry Andric bool X86FastTileConfig::isTileLoad(MachineInstr &MI) { 125*fe6060f1SDimitry Andric return MI.getOpcode() == X86::PTILELOADDV || 126*fe6060f1SDimitry Andric MI.getOpcode() == X86::PTILELOADDT1V; 127*fe6060f1SDimitry Andric } 128*fe6060f1SDimitry Andric bool X86FastTileConfig::isTileStore(MachineInstr &MI) { 129*fe6060f1SDimitry Andric return MI.getOpcode() == X86::PTILESTOREDV; 130*fe6060f1SDimitry Andric } 131*fe6060f1SDimitry Andric bool X86FastTileConfig::isAMXInstr(MachineInstr &MI) { 132*fe6060f1SDimitry Andric // TODO: May need to handle some special nontile amx instrucion. 133*fe6060f1SDimitry Andric if (MI.getOpcode() == X86::PLDTILECFGV || MI.isDebugInstr()) 134*fe6060f1SDimitry Andric return false; 135*fe6060f1SDimitry Andric 136*fe6060f1SDimitry Andric for (MachineOperand &MO : MI.operands()) 137*fe6060f1SDimitry Andric if (isTilePhysReg(MO)) 138*fe6060f1SDimitry Andric return true; 139*fe6060f1SDimitry Andric 140*fe6060f1SDimitry Andric return false; 141*fe6060f1SDimitry Andric } 142*fe6060f1SDimitry Andric 143*fe6060f1SDimitry Andric MachineInstr *X86FastTileConfig::getKeyAMXInstr(MachineInstr *MI) { 144*fe6060f1SDimitry Andric auto Cfg = MachineBasicBlock::iterator(MI); 145*fe6060f1SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 146*fe6060f1SDimitry Andric MachineInstr *KeyMI = nullptr; 147*fe6060f1SDimitry Andric int KeyAMXNum = 0; 148*fe6060f1SDimitry Andric 149*fe6060f1SDimitry Andric for (auto II = Cfg; II != MBB->end(); II++) { 150*fe6060f1SDimitry Andric if (isTileLoad(*II)) { 151*fe6060f1SDimitry Andric KeyMI = &*II; 152*fe6060f1SDimitry Andric continue; 153*fe6060f1SDimitry Andric } 154*fe6060f1SDimitry Andric 155*fe6060f1SDimitry Andric if (isTileStore(*II)) { 156*fe6060f1SDimitry Andric assert(KeyMI && "Key AMX Should be found before!"); 157*fe6060f1SDimitry Andric break; 158*fe6060f1SDimitry Andric } 159*fe6060f1SDimitry Andric 160*fe6060f1SDimitry Andric if (isAMXInstr(*II)) { 161*fe6060f1SDimitry Andric assert((KeyAMXNum == 0) && "Too many Key AMX instruction!"); 162*fe6060f1SDimitry Andric KeyAMXNum++; 163*fe6060f1SDimitry Andric KeyMI = &*II; 164*fe6060f1SDimitry Andric } 165*fe6060f1SDimitry Andric } 166*fe6060f1SDimitry Andric assert(KeyMI && "There must be an AMX instruction."); 167*fe6060f1SDimitry Andric return KeyMI; 168*fe6060f1SDimitry Andric } 169*fe6060f1SDimitry Andric 170*fe6060f1SDimitry Andric // Orderly get the tiles in key amx instruction, uses before defs. 171*fe6060f1SDimitry Andric void X86FastTileConfig::getTileShapesCfg( 172*fe6060f1SDimitry Andric MachineInstr *CfgMI, SmallVector<MachineOperand *> &ShapedTiles) { 173*fe6060f1SDimitry Andric MachineInstr *KeyMI = getKeyAMXInstr(CfgMI); 174*fe6060f1SDimitry Andric 175*fe6060f1SDimitry Andric SmallVector<MachineOperand *> DefTiles; 176*fe6060f1SDimitry Andric for (MachineOperand &MO : KeyMI->operands()) { 177*fe6060f1SDimitry Andric if (!isTilePhysReg(MO)) 178*fe6060f1SDimitry Andric continue; 179*fe6060f1SDimitry Andric if (MO.isDef()) 180*fe6060f1SDimitry Andric DefTiles.push_back(&MO); 181*fe6060f1SDimitry Andric else 182*fe6060f1SDimitry Andric ShapedTiles.push_back(&MO); 183*fe6060f1SDimitry Andric } 184*fe6060f1SDimitry Andric ShapedTiles.append(DefTiles); 185*fe6060f1SDimitry Andric } 186*fe6060f1SDimitry Andric 187*fe6060f1SDimitry Andric // We pre-config the shapes at position named with "amx.tmm.N.shape.row* and 188*fe6060f1SDimitry Andric // amx.shape.N.col*" at pass "Pre AMX Tile Config". 189*fe6060f1SDimitry Andric // The 'N' implies the order of tiles in key amx intrinsic. 190*fe6060f1SDimitry Andric void X86FastTileConfig::getShapeCfgInstrs( 191*fe6060f1SDimitry Andric MachineInstr *MI, std::map<unsigned, MachineInstr *> &RowCfgs, 192*fe6060f1SDimitry Andric std::map<unsigned, MachineInstr *> &ColCfgs) { 193*fe6060f1SDimitry Andric auto Cfg = MachineBasicBlock::iterator(MI); 194*fe6060f1SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 195*fe6060f1SDimitry Andric 196*fe6060f1SDimitry Andric for (auto II = Cfg; II != MBB->begin(); II--) { 197*fe6060f1SDimitry Andric if (isAMXInstr(*II) || II->isTerminator() || II->isCall()) 198*fe6060f1SDimitry Andric break; 199*fe6060f1SDimitry Andric if (!II->mayStore() || !II->hasOneMemOperand()) 200*fe6060f1SDimitry Andric continue; 201*fe6060f1SDimitry Andric const Value *MemPtr = II->memoperands()[0]->getValue(); 202*fe6060f1SDimitry Andric if (!MemPtr) 203*fe6060f1SDimitry Andric continue; 204*fe6060f1SDimitry Andric 205*fe6060f1SDimitry Andric StringRef Name = MemPtr->getName(); 206*fe6060f1SDimitry Andric if (!Name.startswith("amx.tmm.")) 207*fe6060f1SDimitry Andric continue; 208*fe6060f1SDimitry Andric 209*fe6060f1SDimitry Andric // Get the 'N'th tile shape config in key amx instruction. 210*fe6060f1SDimitry Andric auto N = Name.find(".shape"); 211*fe6060f1SDimitry Andric StringRef STileIdx = Name.slice(8, N); 212*fe6060f1SDimitry Andric unsigned Idx; 213*fe6060f1SDimitry Andric STileIdx.getAsInteger(10, Idx); 214*fe6060f1SDimitry Andric 215*fe6060f1SDimitry Andric // And related them with their store instructions. 216*fe6060f1SDimitry Andric if (Name.contains("row")) 217*fe6060f1SDimitry Andric RowCfgs[Idx] = &*II; 218*fe6060f1SDimitry Andric else if (Name.contains("col")) 219*fe6060f1SDimitry Andric ColCfgs[Idx] = &*II; 220*fe6060f1SDimitry Andric else 221*fe6060f1SDimitry Andric llvm_unreachable("Invalid tile shape info!"); 222*fe6060f1SDimitry Andric } 223*fe6060f1SDimitry Andric assert((RowCfgs.size() == ColCfgs.size()) && 224*fe6060f1SDimitry Andric "The number of tile row and col must be equal!"); 225*fe6060f1SDimitry Andric } 226*fe6060f1SDimitry Andric 227*fe6060f1SDimitry Andric // Here is the data format for the tile config. 228*fe6060f1SDimitry Andric // 0 palette = 1 now. 229*fe6060f1SDimitry Andric // 1 start_row = 0 now. 230*fe6060f1SDimitry Andric // 2-15 reserved, must be zero 231*fe6060f1SDimitry Andric // 16-17 tile0.colsb Tile 0 bytes per row. 232*fe6060f1SDimitry Andric // 18-19 tile1.colsb Tile 1 bytes per row. 233*fe6060f1SDimitry Andric // 20-21 tile2.colsb Tile 2 bytes per row. 234*fe6060f1SDimitry Andric // ... (sequence continues) 235*fe6060f1SDimitry Andric // 30-31 tile7.colsb Tile 7 bytes per row. 236*fe6060f1SDimitry Andric // 32-47 reserved, must be zero 237*fe6060f1SDimitry Andric // 48 tile0.rows Tile 0 rows. 238*fe6060f1SDimitry Andric // 49 tile1.rows Tile 1 rows. 239*fe6060f1SDimitry Andric // 50 tile2.rows Tile 2 rows. 240*fe6060f1SDimitry Andric // ... (sequence continues) 241*fe6060f1SDimitry Andric // 55 tile7.rows Tile 7 rows. 242*fe6060f1SDimitry Andric // 56-63 reserved, must be zero 243*fe6060f1SDimitry Andric void X86FastTileConfig::rewriteTileCfg( 244*fe6060f1SDimitry Andric SmallVector<MachineOperand *> &ShapedTiles, 245*fe6060f1SDimitry Andric std::map<unsigned, MachineInstr *> &RowCfgs, 246*fe6060f1SDimitry Andric std::map<unsigned, MachineInstr *> &ColCfgs) { 247*fe6060f1SDimitry Andric assert((RowCfgs.size() == ShapedTiles.size()) && 248*fe6060f1SDimitry Andric "The number of tile shapes not equal with the number of tiles!"); 249*fe6060f1SDimitry Andric 250*fe6060f1SDimitry Andric // Orderly get the tiles and adjust the shape config. 251*fe6060f1SDimitry Andric for (unsigned I = 0, E = ShapedTiles.size(); I < E; I++) { 252*fe6060f1SDimitry Andric MachineOperand *MO = ShapedTiles[I]; 253*fe6060f1SDimitry Andric unsigned TmmIdx = getTilePhysRegIdx(MO); 254*fe6060f1SDimitry Andric if (I == TmmIdx) 255*fe6060f1SDimitry Andric continue; 256*fe6060f1SDimitry Andric adjustRowCfg(TmmIdx, RowCfgs[I]); 257*fe6060f1SDimitry Andric adjustColCfg(TmmIdx, ColCfgs[I]); 258*fe6060f1SDimitry Andric } 259*fe6060f1SDimitry Andric } 260*fe6060f1SDimitry Andric 261*fe6060f1SDimitry Andric // We have already preconfig the shapes before fast register allocation at 262*fe6060f1SDimitry Andric // X86PreAMXConfig::preWriteTileCfg(). Now, we have done fast register 263*fe6060f1SDimitry Andric // allocation, the shapes pre-written before may not rightly corresponding 264*fe6060f1SDimitry Andric // to the correct tmm registers, so we need adjust them. 265*fe6060f1SDimitry Andric void X86FastTileConfig::materializeTileCfg(MachineInstr *CfgMI) { 266*fe6060f1SDimitry Andric SmallVector<MachineOperand *> ShapedTiles; 267*fe6060f1SDimitry Andric std::map<unsigned, MachineInstr *> RowCfgs; 268*fe6060f1SDimitry Andric std::map<unsigned, MachineInstr *> ColCfgs; 269*fe6060f1SDimitry Andric 270*fe6060f1SDimitry Andric // Orderly keep the tile uses and def in ShapedTiles; 271*fe6060f1SDimitry Andric getTileShapesCfg(CfgMI, ShapedTiles); 272*fe6060f1SDimitry Andric assert(ShapedTiles.size() && "Not find shapes config!"); 273*fe6060f1SDimitry Andric 274*fe6060f1SDimitry Andric getShapeCfgInstrs(CfgMI, RowCfgs, ColCfgs); 275*fe6060f1SDimitry Andric 276*fe6060f1SDimitry Andric rewriteTileCfg(ShapedTiles, RowCfgs, ColCfgs); 277*fe6060f1SDimitry Andric } 278*fe6060f1SDimitry Andric 279*fe6060f1SDimitry Andric bool X86FastTileConfig::fastTileConfig() { 280*fe6060f1SDimitry Andric bool Changed = false; 281*fe6060f1SDimitry Andric 282*fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : *MF) { 283*fe6060f1SDimitry Andric SmallVector<MachineInstr *, 2> CFGs; 284*fe6060f1SDimitry Andric for (MachineInstr &MI : MBB) 285*fe6060f1SDimitry Andric if (MI.getOpcode() == X86::PLDTILECFGV) 286*fe6060f1SDimitry Andric CFGs.push_back(&MI); 287*fe6060f1SDimitry Andric for (auto *MI : CFGs) 288*fe6060f1SDimitry Andric materializeTileCfg(MI); 289*fe6060f1SDimitry Andric if (!CFGs.empty()) 290*fe6060f1SDimitry Andric Changed = true; 291*fe6060f1SDimitry Andric } 292*fe6060f1SDimitry Andric return Changed; 293*fe6060f1SDimitry Andric } 294*fe6060f1SDimitry Andric 295*fe6060f1SDimitry Andric bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { 296*fe6060f1SDimitry Andric MF = &MFunc; 297*fe6060f1SDimitry Andric MRI = &MFunc.getRegInfo(); 298*fe6060f1SDimitry Andric ST = &MFunc.getSubtarget<X86Subtarget>(); 299*fe6060f1SDimitry Andric TRI = ST->getRegisterInfo(); 300*fe6060f1SDimitry Andric TII = MFunc.getSubtarget().getInstrInfo(); 301*fe6060f1SDimitry Andric 302*fe6060f1SDimitry Andric return fastTileConfig(); 303*fe6060f1SDimitry Andric } 304*fe6060f1SDimitry Andric 305*fe6060f1SDimitry Andric FunctionPass *llvm::createX86FastTileConfigPass() { 306*fe6060f1SDimitry Andric return new X86FastTileConfig(); 307*fe6060f1SDimitry Andric } 308