1*e8d8bef9SDimitry Andric //===-- X86PreTileConfig.cpp - Tile Register Configure---------------------===// 2*e8d8bef9SDimitry Andric // 3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e8d8bef9SDimitry Andric // 7*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8*e8d8bef9SDimitry Andric // 9*e8d8bef9SDimitry Andric /// \file Pass to pre-config the shape of AMX register 10*e8d8bef9SDimitry Andric /// AMX register need to be configured before use. The shape of AMX register 11*e8d8bef9SDimitry Andric /// is encoded in the 1st and 2nd machine operand of AMX pseudo instructions. 12*e8d8bef9SDimitry Andric /// The pldtilecfg is to config tile registers. It should dominator all AMX 13*e8d8bef9SDimitry Andric /// instructions. The pldtilecfg produce a virtual cfg register and the cfg 14*e8d8bef9SDimitry Andric /// register is used by all AMX instructions. 15*e8d8bef9SDimitry Andric /// This pass is to find the common dominator of all AMX instructions and 16*e8d8bef9SDimitry Andric /// insert the pldtilecfg instruction. Besides the cfg register that pldtilecfg 17*e8d8bef9SDimitry Andric /// produces is inserted as the last operand of each AMX instruction. We use 18*e8d8bef9SDimitry Andric /// this scheme to model the def-use relationship between AMX config instruction 19*e8d8bef9SDimitry Andric /// and other AMX instructions. Below is an example. 20*e8d8bef9SDimitry Andric /// 21*e8d8bef9SDimitry Andric /// ----B1---- 22*e8d8bef9SDimitry Andric /// / \ 23*e8d8bef9SDimitry Andric /// / \ 24*e8d8bef9SDimitry Andric /// B2 B3 25*e8d8bef9SDimitry Andric /// %1:tile = PTILELOADDV %2:tile = PTILELOADDV 26*e8d8bef9SDimitry Andric /// 27*e8d8bef9SDimitry Andric /// is transformed to 28*e8d8bef9SDimitry Andric /// 29*e8d8bef9SDimitry Andric /// B1 30*e8d8bef9SDimitry Andric /// %25:tilecfg = PLDTILECFG 31*e8d8bef9SDimitry Andric /// / \ 32*e8d8bef9SDimitry Andric /// / \ 33*e8d8bef9SDimitry Andric /// %1:tile = PTILELOADDV %25 %2:tile = PTILELOADDV %25 34*e8d8bef9SDimitry Andric // 35*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 36*e8d8bef9SDimitry Andric 37*e8d8bef9SDimitry Andric #include "X86.h" 38*e8d8bef9SDimitry Andric #include "X86InstrBuilder.h" 39*e8d8bef9SDimitry Andric #include "X86RegisterInfo.h" 40*e8d8bef9SDimitry Andric #include "X86Subtarget.h" 41*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 42*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 43*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 44*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 45*e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h" 46*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 47*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 48*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TileShapeInfo.h" 49*e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 50*e8d8bef9SDimitry Andric 51*e8d8bef9SDimitry Andric using namespace llvm; 52*e8d8bef9SDimitry Andric 53*e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-pre-config" 54*e8d8bef9SDimitry Andric 55*e8d8bef9SDimitry Andric namespace { 56*e8d8bef9SDimitry Andric 57*e8d8bef9SDimitry Andric class X86PreTileConfig : public MachineFunctionPass { 58*e8d8bef9SDimitry Andric // context 59*e8d8bef9SDimitry Andric MachineFunction *MF = nullptr; 60*e8d8bef9SDimitry Andric const X86Subtarget *ST = nullptr; 61*e8d8bef9SDimitry Andric const TargetRegisterInfo *TRI; 62*e8d8bef9SDimitry Andric const TargetInstrInfo *TII; 63*e8d8bef9SDimitry Andric MachineDominatorTree *DomTree = nullptr; 64*e8d8bef9SDimitry Andric MachineRegisterInfo *MRI = nullptr; 65*e8d8bef9SDimitry Andric 66*e8d8bef9SDimitry Andric MachineInstr *getTileConfigPoint(); 67*e8d8bef9SDimitry Andric 68*e8d8bef9SDimitry Andric public: 69*e8d8bef9SDimitry Andric X86PreTileConfig() : MachineFunctionPass(ID) {} 70*e8d8bef9SDimitry Andric 71*e8d8bef9SDimitry Andric /// Return the pass name. 72*e8d8bef9SDimitry Andric StringRef getPassName() const override { 73*e8d8bef9SDimitry Andric return "Tile Register Pre-configure"; 74*e8d8bef9SDimitry Andric } 75*e8d8bef9SDimitry Andric 76*e8d8bef9SDimitry Andric /// X86PreTileConfig analysis usage. 77*e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 78*e8d8bef9SDimitry Andric 79*e8d8bef9SDimitry Andric /// Perform register allocation. 80*e8d8bef9SDimitry Andric bool runOnMachineFunction(MachineFunction &mf) override; 81*e8d8bef9SDimitry Andric 82*e8d8bef9SDimitry Andric static char ID; 83*e8d8bef9SDimitry Andric }; 84*e8d8bef9SDimitry Andric 85*e8d8bef9SDimitry Andric } // end anonymous namespace 86*e8d8bef9SDimitry Andric 87*e8d8bef9SDimitry Andric char X86PreTileConfig::ID = 0; 88*e8d8bef9SDimitry Andric 89*e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86PreTileConfig, "tilepreconfig", 90*e8d8bef9SDimitry Andric "Tile Register Configure", false, false) 91*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 92*e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig", 93*e8d8bef9SDimitry Andric "Tile Register Configure", false, false) 94*e8d8bef9SDimitry Andric 95*e8d8bef9SDimitry Andric void X86PreTileConfig::getAnalysisUsage(AnalysisUsage &AU) const { 96*e8d8bef9SDimitry Andric AU.setPreservesAll(); 97*e8d8bef9SDimitry Andric AU.addRequired<MachineDominatorTree>(); 98*e8d8bef9SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 99*e8d8bef9SDimitry Andric } 100*e8d8bef9SDimitry Andric 101*e8d8bef9SDimitry Andric static Register buildConfigMI(MachineBasicBlock::iterator MI, int FrameIdx, 102*e8d8bef9SDimitry Andric const TargetInstrInfo *TII, 103*e8d8bef9SDimitry Andric MachineRegisterInfo *MRI, 104*e8d8bef9SDimitry Andric const X86Subtarget *ST) { 105*e8d8bef9SDimitry Andric auto *MBB = MI->getParent(); 106*e8d8bef9SDimitry Andric 107*e8d8bef9SDimitry Andric // FIXME: AMX should assume AVX512 enabled. 108*e8d8bef9SDimitry Andric if (ST->hasAVX512()) { 109*e8d8bef9SDimitry Andric // Zero stack slot. 110*e8d8bef9SDimitry Andric Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); 111*e8d8bef9SDimitry Andric BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORDZrr), Zmm) 112*e8d8bef9SDimitry Andric .addReg(Zmm, RegState::Undef) 113*e8d8bef9SDimitry Andric .addReg(Zmm, RegState::Undef); 114*e8d8bef9SDimitry Andric addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSZmr)), 115*e8d8bef9SDimitry Andric FrameIdx) 116*e8d8bef9SDimitry Andric .addReg(Zmm); 117*e8d8bef9SDimitry Andric } 118*e8d8bef9SDimitry Andric 119*e8d8bef9SDimitry Andric // build psuedo ldtilecfg 120*e8d8bef9SDimitry Andric Register VReg = MRI->createVirtualRegister(&X86::TILECFGRegClass); 121*e8d8bef9SDimitry Andric 122*e8d8bef9SDimitry Andric addFrameReference( 123*e8d8bef9SDimitry Andric BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::PLDTILECFG), VReg), FrameIdx); 124*e8d8bef9SDimitry Andric 125*e8d8bef9SDimitry Andric return VReg; 126*e8d8bef9SDimitry Andric } 127*e8d8bef9SDimitry Andric 128*e8d8bef9SDimitry Andric static ShapeT getShape(const MachineInstr &MI, MachineRegisterInfo *MRI) { 129*e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 130*e8d8bef9SDimitry Andric switch (Opcode) { 131*e8d8bef9SDimitry Andric default: 132*e8d8bef9SDimitry Andric llvm_unreachable("Unexpected machine instruction on tile"); 133*e8d8bef9SDimitry Andric case X86::PTILELOADDV: 134*e8d8bef9SDimitry Andric case X86::PTDPBSSDV: 135*e8d8bef9SDimitry Andric case X86::PTILEZEROV: 136*e8d8bef9SDimitry Andric MachineOperand &MO1 = const_cast<MachineOperand &>(MI.getOperand(1)); 137*e8d8bef9SDimitry Andric MachineOperand &MO2 = const_cast<MachineOperand &>(MI.getOperand(2)); 138*e8d8bef9SDimitry Andric ShapeT Shape(&MO1, &MO2, MRI); 139*e8d8bef9SDimitry Andric return Shape; 140*e8d8bef9SDimitry Andric } 141*e8d8bef9SDimitry Andric } 142*e8d8bef9SDimitry Andric 143*e8d8bef9SDimitry Andric MachineInstr *X86PreTileConfig::getTileConfigPoint() { 144*e8d8bef9SDimitry Andric DenseMap<Register, ShapeT> PhysShapeInfo; 145*e8d8bef9SDimitry Andric MachineBasicBlock *MBB = nullptr; 146*e8d8bef9SDimitry Andric DenseSet<const MachineInstr *> MIs; 147*e8d8bef9SDimitry Andric for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { 148*e8d8bef9SDimitry Andric Register VirtReg = Register::index2VirtReg(i); 149*e8d8bef9SDimitry Andric if (MRI->reg_nodbg_empty(VirtReg)) 150*e8d8bef9SDimitry Andric continue; 151*e8d8bef9SDimitry Andric const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 152*e8d8bef9SDimitry Andric if (RC.getID() != X86::TILERegClassID) 153*e8d8bef9SDimitry Andric continue; 154*e8d8bef9SDimitry Andric 155*e8d8bef9SDimitry Andric // Find the common dominator for all MI that define tile register. 156*e8d8bef9SDimitry Andric for (const MachineOperand &MO : MRI->def_operands(VirtReg)) { 157*e8d8bef9SDimitry Andric if (MO.isUndef()) 158*e8d8bef9SDimitry Andric continue; 159*e8d8bef9SDimitry Andric const auto *MI = MO.getParent(); 160*e8d8bef9SDimitry Andric // PHI or IMPLICIT_DEF instructiion. 161*e8d8bef9SDimitry Andric // There must be a input tile before PHI instruction. 162*e8d8bef9SDimitry Andric if (MI->isTransient()) 163*e8d8bef9SDimitry Andric continue; 164*e8d8bef9SDimitry Andric if (!MBB) 165*e8d8bef9SDimitry Andric MBB = const_cast<MachineBasicBlock *>(MI->getParent()); 166*e8d8bef9SDimitry Andric MBB = DomTree->findNearestCommonDominator( 167*e8d8bef9SDimitry Andric MBB, const_cast<MachineBasicBlock *>(MI->getParent())); 168*e8d8bef9SDimitry Andric 169*e8d8bef9SDimitry Andric // Collect the instructions that define shape. 170*e8d8bef9SDimitry Andric ShapeT Shape = getShape(*MI, MRI); 171*e8d8bef9SDimitry Andric std::array<MachineOperand *, 2> ShapeMOs = {Shape.getRow(), 172*e8d8bef9SDimitry Andric Shape.getCol()}; 173*e8d8bef9SDimitry Andric for (auto *ShapeMO : ShapeMOs) { 174*e8d8bef9SDimitry Andric Register ShapeReg = ShapeMO->getReg(); 175*e8d8bef9SDimitry Andric for (const MachineOperand &MO : MRI->def_operands(ShapeReg)) { 176*e8d8bef9SDimitry Andric const auto *ShapeMI = MO.getParent(); 177*e8d8bef9SDimitry Andric MIs.insert(ShapeMI); 178*e8d8bef9SDimitry Andric } 179*e8d8bef9SDimitry Andric } 180*e8d8bef9SDimitry Andric } 181*e8d8bef9SDimitry Andric } 182*e8d8bef9SDimitry Andric if (!MBB) 183*e8d8bef9SDimitry Andric return nullptr; 184*e8d8bef9SDimitry Andric // This pass is before the pass of eliminating PHI node, so it 185*e8d8bef9SDimitry Andric // is in SSA form. 186*e8d8bef9SDimitry Andric assert(MRI->isSSA() && "Not SSA form in pre-tile config"); 187*e8d8bef9SDimitry Andric // Shape def should dominate tile config MBB. 188*e8d8bef9SDimitry Andric // def s s1 s2 189*e8d8bef9SDimitry Andric // / \ \ / 190*e8d8bef9SDimitry Andric // / \ \ / 191*e8d8bef9SDimitry Andric // conf s3=phi(s1,s2) 192*e8d8bef9SDimitry Andric // | 193*e8d8bef9SDimitry Andric // c 194*e8d8bef9SDimitry Andric // 195*e8d8bef9SDimitry Andric for (const auto *MI : MIs) { 196*e8d8bef9SDimitry Andric const MachineBasicBlock *ShapeMBB = MI->getParent(); 197*e8d8bef9SDimitry Andric if (DomTree->dominates(ShapeMBB, MBB)) 198*e8d8bef9SDimitry Andric continue; 199*e8d8bef9SDimitry Andric if (MI->isMoveImmediate()) 200*e8d8bef9SDimitry Andric continue; 201*e8d8bef9SDimitry Andric report_fatal_error(MF->getName() + ": Failed to config tile register, " 202*e8d8bef9SDimitry Andric "please define the shape earlier"); 203*e8d8bef9SDimitry Andric } 204*e8d8bef9SDimitry Andric 205*e8d8bef9SDimitry Andric // ldtilecfg should be inserted after the MI that define the shape. 206*e8d8bef9SDimitry Andric MachineBasicBlock::reverse_instr_iterator I, E; 207*e8d8bef9SDimitry Andric for (I = MBB->instr_rbegin(), E = MBB->instr_rend(); I != E; ++I) { 208*e8d8bef9SDimitry Andric auto *MI = &*I; 209*e8d8bef9SDimitry Andric if (MIs.count(MI) && (!MI->isMoveImmediate())) 210*e8d8bef9SDimitry Andric break; 211*e8d8bef9SDimitry Andric } 212*e8d8bef9SDimitry Andric MachineBasicBlock::iterator MII; 213*e8d8bef9SDimitry Andric if (I == E) 214*e8d8bef9SDimitry Andric MII = MBB->getFirstNonPHI(); 215*e8d8bef9SDimitry Andric else { 216*e8d8bef9SDimitry Andric MII = MachineBasicBlock::iterator(&*I); 217*e8d8bef9SDimitry Andric MII++; 218*e8d8bef9SDimitry Andric } 219*e8d8bef9SDimitry Andric return &*MII; 220*e8d8bef9SDimitry Andric } 221*e8d8bef9SDimitry Andric 222*e8d8bef9SDimitry Andric static void addTileCFGUse(MachineFunction &MF, Register CFG) { 223*e8d8bef9SDimitry Andric for (MachineBasicBlock &MBB : MF) { 224*e8d8bef9SDimitry Andric 225*e8d8bef9SDimitry Andric // Traverse the basic block. 226*e8d8bef9SDimitry Andric for (MachineInstr &MI : MBB) { 227*e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 228*e8d8bef9SDimitry Andric switch (Opcode) { 229*e8d8bef9SDimitry Andric default: 230*e8d8bef9SDimitry Andric break; 231*e8d8bef9SDimitry Andric case X86::PTILELOADDV: 232*e8d8bef9SDimitry Andric case X86::PTILESTOREDV: 233*e8d8bef9SDimitry Andric case X86::PTDPBSSDV: 234*e8d8bef9SDimitry Andric case X86::PTILEZEROV: 235*e8d8bef9SDimitry Andric unsigned NumOperands = MI.getNumOperands(); 236*e8d8bef9SDimitry Andric MI.RemoveOperand(NumOperands - 1); 237*e8d8bef9SDimitry Andric MI.addOperand(MF, MachineOperand::CreateReg(CFG, false)); 238*e8d8bef9SDimitry Andric break; 239*e8d8bef9SDimitry Andric } 240*e8d8bef9SDimitry Andric } 241*e8d8bef9SDimitry Andric } 242*e8d8bef9SDimitry Andric } 243*e8d8bef9SDimitry Andric 244*e8d8bef9SDimitry Andric bool X86PreTileConfig::runOnMachineFunction(MachineFunction &mf) { 245*e8d8bef9SDimitry Andric MF = &mf; 246*e8d8bef9SDimitry Andric MRI = &mf.getRegInfo(); 247*e8d8bef9SDimitry Andric ST = &mf.getSubtarget<X86Subtarget>(); 248*e8d8bef9SDimitry Andric TRI = ST->getRegisterInfo(); 249*e8d8bef9SDimitry Andric TII = mf.getSubtarget().getInstrInfo(); 250*e8d8bef9SDimitry Andric DomTree = &getAnalysis<MachineDominatorTree>(); 251*e8d8bef9SDimitry Andric 252*e8d8bef9SDimitry Andric MachineInstr *MI = getTileConfigPoint(); 253*e8d8bef9SDimitry Andric if (!MI) 254*e8d8bef9SDimitry Andric return false; 255*e8d8bef9SDimitry Andric unsigned Size = ST->getTileConfigSize(); 256*e8d8bef9SDimitry Andric Align Alignment = ST->getTileConfigAlignment(); 257*e8d8bef9SDimitry Andric int SS = mf.getFrameInfo().CreateStackObject(Size, Alignment, false); 258*e8d8bef9SDimitry Andric Register CFG = buildConfigMI(MI, SS, TII, MRI, ST); 259*e8d8bef9SDimitry Andric addTileCFGUse(mf, CFG); 260*e8d8bef9SDimitry Andric return true; 261*e8d8bef9SDimitry Andric } 262*e8d8bef9SDimitry Andric 263*e8d8bef9SDimitry Andric FunctionPass *llvm::createX86PreTileConfigPass() { 264*e8d8bef9SDimitry Andric return new X86PreTileConfig(); 265*e8d8bef9SDimitry Andric } 266