1*81ad6265SDimitry Andric //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===// 2*81ad6265SDimitry Andric // 3*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*81ad6265SDimitry Andric // 7*81ad6265SDimitry Andric //===----------------------------------------------------------------------===// 8*81ad6265SDimitry Andric // 9*81ad6265SDimitry Andric /// \file Pass to preconfig the shape of physical tile registers 10*81ad6265SDimitry Andric /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm 11*81ad6265SDimitry Andric /// walk each instruction of basic block in reverse order. All the tile 12*81ad6265SDimitry Andric /// registers that live out the basic block would be spilled and reloaded 13*81ad6265SDimitry Andric /// before its user. It also check the depenedency of the shape to ensure 14*81ad6265SDimitry Andric /// the shape is defined before ldtilecfg. 15*81ad6265SDimitry Andric // 16*81ad6265SDimitry Andric //===----------------------------------------------------------------------===// 17*81ad6265SDimitry Andric 18*81ad6265SDimitry Andric #include "X86.h" 19*81ad6265SDimitry Andric #include "X86InstrBuilder.h" 20*81ad6265SDimitry Andric #include "X86MachineFunctionInfo.h" 21*81ad6265SDimitry Andric #include "X86RegisterInfo.h" 22*81ad6265SDimitry Andric #include "X86Subtarget.h" 23*81ad6265SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h" 24*81ad6265SDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 25*81ad6265SDimitry Andric #include "llvm/ADT/Statistic.h" 26*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 27*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 28*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 29*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 30*81ad6265SDimitry Andric #include "llvm/CodeGen/Passes.h" 31*81ad6265SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 32*81ad6265SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 33*81ad6265SDimitry Andric #include "llvm/InitializePasses.h" 34*81ad6265SDimitry Andric #include "llvm/Support/Debug.h" 35*81ad6265SDimitry Andric 36*81ad6265SDimitry Andric using namespace llvm; 37*81ad6265SDimitry Andric 38*81ad6265SDimitry Andric #define DEBUG_TYPE "fastpretileconfig" 39*81ad6265SDimitry Andric 40*81ad6265SDimitry Andric STATISTIC(NumStores, "Number of stores added"); 41*81ad6265SDimitry Andric STATISTIC(NumLoads, "Number of loads added"); 42*81ad6265SDimitry Andric 43*81ad6265SDimitry Andric namespace { 44*81ad6265SDimitry Andric 45*81ad6265SDimitry Andric class X86FastPreTileConfig : public MachineFunctionPass { 46*81ad6265SDimitry Andric MachineFunction *MF = nullptr; 47*81ad6265SDimitry Andric const X86Subtarget *ST = nullptr; 48*81ad6265SDimitry Andric const TargetInstrInfo *TII = nullptr; 49*81ad6265SDimitry Andric MachineRegisterInfo *MRI = nullptr; 50*81ad6265SDimitry Andric X86MachineFunctionInfo *X86FI = nullptr; 51*81ad6265SDimitry Andric MachineFrameInfo *MFI = nullptr; 52*81ad6265SDimitry Andric const TargetRegisterInfo *TRI = nullptr; 53*81ad6265SDimitry Andric MachineBasicBlock *MBB = nullptr; 54*81ad6265SDimitry Andric int CfgSS = -1; 55*81ad6265SDimitry Andric struct PHIInfo { 56*81ad6265SDimitry Andric Register Row; 57*81ad6265SDimitry Andric Register Col; 58*81ad6265SDimitry Andric Register StackAddr; 59*81ad6265SDimitry Andric }; 60*81ad6265SDimitry Andric DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs; 61*81ad6265SDimitry Andric 62*81ad6265SDimitry Andric /// Maps virtual regs to the frame index where these values are spilled. 63*81ad6265SDimitry Andric IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; 64*81ad6265SDimitry Andric 65*81ad6265SDimitry Andric /// Has a bit set for tile virtual register for which it was determined 66*81ad6265SDimitry Andric /// that it is alive across blocks. 67*81ad6265SDimitry Andric BitVector MayLiveAcrossBlocks; 68*81ad6265SDimitry Andric 69*81ad6265SDimitry Andric int getStackSpaceFor(Register VirtReg); 70*81ad6265SDimitry Andric void InitializeTileConfigStackSpace(); 71*81ad6265SDimitry Andric bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI); 72*81ad6265SDimitry Andric void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill); 73*81ad6265SDimitry Andric void reload(MachineBasicBlock::iterator UseMI, Register VirtReg, 74*81ad6265SDimitry Andric MachineOperand *RowMO, MachineOperand *ColMO); 75*81ad6265SDimitry Andric void canonicalizePHIs(MachineBasicBlock &MBB); 76*81ad6265SDimitry Andric void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI); 77*81ad6265SDimitry Andric void convertPHIs(MachineBasicBlock &MBB); 78*81ad6265SDimitry Andric bool configBasicBlock(MachineBasicBlock &MBB); 79*81ad6265SDimitry Andric 80*81ad6265SDimitry Andric public: 81*81ad6265SDimitry Andric X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {} 82*81ad6265SDimitry Andric 83*81ad6265SDimitry Andric /// Return the pass name. 84*81ad6265SDimitry Andric StringRef getPassName() const override { 85*81ad6265SDimitry Andric return "Fast Tile Register Preconfigure"; 86*81ad6265SDimitry Andric } 87*81ad6265SDimitry Andric 88*81ad6265SDimitry Andric /// Perform tile register configure. 89*81ad6265SDimitry Andric bool runOnMachineFunction(MachineFunction &MFunc) override; 90*81ad6265SDimitry Andric 91*81ad6265SDimitry Andric static char ID; 92*81ad6265SDimitry Andric }; 93*81ad6265SDimitry Andric 94*81ad6265SDimitry Andric } // end anonymous namespace 95*81ad6265SDimitry Andric 96*81ad6265SDimitry Andric char X86FastPreTileConfig::ID = 0; 97*81ad6265SDimitry Andric 98*81ad6265SDimitry Andric INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE, 99*81ad6265SDimitry Andric "Fast Tile Register Preconfigure", false, false) 100*81ad6265SDimitry Andric INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE, 101*81ad6265SDimitry Andric "Fast Tile Register Preconfigure", false, false) 102*81ad6265SDimitry Andric 103*81ad6265SDimitry Andric static bool dominates(MachineBasicBlock &MBB, 104*81ad6265SDimitry Andric MachineBasicBlock::const_iterator A, 105*81ad6265SDimitry Andric MachineBasicBlock::const_iterator B) { 106*81ad6265SDimitry Andric auto MBBEnd = MBB.end(); 107*81ad6265SDimitry Andric if (B == MBBEnd) 108*81ad6265SDimitry Andric return true; 109*81ad6265SDimitry Andric 110*81ad6265SDimitry Andric MachineBasicBlock::const_iterator I = MBB.begin(); 111*81ad6265SDimitry Andric for (; &*I != A && &*I != B; ++I) 112*81ad6265SDimitry Andric ; 113*81ad6265SDimitry Andric 114*81ad6265SDimitry Andric return &*I == A; 115*81ad6265SDimitry Andric } 116*81ad6265SDimitry Andric 117*81ad6265SDimitry Andric /// This allocates space for the specified virtual register to be held on the 118*81ad6265SDimitry Andric /// stack. 119*81ad6265SDimitry Andric int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) { 120*81ad6265SDimitry Andric // Find the location Reg would belong... 121*81ad6265SDimitry Andric int SS = StackSlotForVirtReg[VirtReg]; 122*81ad6265SDimitry Andric // Already has space allocated? 123*81ad6265SDimitry Andric if (SS != -1) 124*81ad6265SDimitry Andric return SS; 125*81ad6265SDimitry Andric 126*81ad6265SDimitry Andric // Allocate a new stack object for this spill location... 127*81ad6265SDimitry Andric const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 128*81ad6265SDimitry Andric unsigned Size = TRI->getSpillSize(RC); 129*81ad6265SDimitry Andric Align Alignment = TRI->getSpillAlign(RC); 130*81ad6265SDimitry Andric int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment); 131*81ad6265SDimitry Andric 132*81ad6265SDimitry Andric // Assign the slot. 133*81ad6265SDimitry Andric StackSlotForVirtReg[VirtReg] = FrameIdx; 134*81ad6265SDimitry Andric return FrameIdx; 135*81ad6265SDimitry Andric } 136*81ad6265SDimitry Andric 137*81ad6265SDimitry Andric /// Returns false if \p VirtReg is known to not live out of the current config. 138*81ad6265SDimitry Andric /// If \p VirtReg live out of the current MBB, it must live out of the current 139*81ad6265SDimitry Andric /// config 140*81ad6265SDimitry Andric bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) { 141*81ad6265SDimitry Andric if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) 142*81ad6265SDimitry Andric return true; 143*81ad6265SDimitry Andric 144*81ad6265SDimitry Andric for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) { 145*81ad6265SDimitry Andric if (UseInst.getParent() != MBB) { 146*81ad6265SDimitry Andric MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); 147*81ad6265SDimitry Andric return true; 148*81ad6265SDimitry Andric } 149*81ad6265SDimitry Andric 150*81ad6265SDimitry Andric // The use and def are in the same MBB. If the tile register is 151*81ad6265SDimitry Andric // reconfigured, it is crobbered and we need to spill and reload 152*81ad6265SDimitry Andric // tile register. 153*81ad6265SDimitry Andric if (CfgMI) { 154*81ad6265SDimitry Andric if (dominates(*MBB, *CfgMI, UseInst)) { 155*81ad6265SDimitry Andric MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); 156*81ad6265SDimitry Andric return true; 157*81ad6265SDimitry Andric } 158*81ad6265SDimitry Andric } 159*81ad6265SDimitry Andric } 160*81ad6265SDimitry Andric 161*81ad6265SDimitry Andric return false; 162*81ad6265SDimitry Andric } 163*81ad6265SDimitry Andric 164*81ad6265SDimitry Andric void X86FastPreTileConfig::InitializeTileConfigStackSpace() { 165*81ad6265SDimitry Andric MachineBasicBlock &MBB = MF->front(); 166*81ad6265SDimitry Andric MachineInstr *MI = &*MBB.getFirstNonPHI(); 167*81ad6265SDimitry Andric DebugLoc DL; 168*81ad6265SDimitry Andric if (ST->hasAVX512()) { 169*81ad6265SDimitry Andric Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); 170*81ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm); 171*81ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS) 172*81ad6265SDimitry Andric .addReg(Zmm); 173*81ad6265SDimitry Andric } else if (ST->hasAVX2()) { 174*81ad6265SDimitry Andric Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); 175*81ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm); 176*81ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS) 177*81ad6265SDimitry Andric .addReg(Ymm); 178*81ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS, 179*81ad6265SDimitry Andric 32) 180*81ad6265SDimitry Andric .addReg(Ymm); 181*81ad6265SDimitry Andric } else { 182*81ad6265SDimitry Andric assert(ST->hasSSE2() && "AMX should assume SSE2 enabled"); 183*81ad6265SDimitry Andric unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; 184*81ad6265SDimitry Andric Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); 185*81ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm); 186*81ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS) 187*81ad6265SDimitry Andric .addReg(Xmm); 188*81ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16) 189*81ad6265SDimitry Andric .addReg(Xmm); 190*81ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32) 191*81ad6265SDimitry Andric .addReg(Xmm); 192*81ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48) 193*81ad6265SDimitry Andric .addReg(Xmm); 194*81ad6265SDimitry Andric } 195*81ad6265SDimitry Andric // Fill in the palette first. 196*81ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS) 197*81ad6265SDimitry Andric .addImm(1); 198*81ad6265SDimitry Andric } 199*81ad6265SDimitry Andric 200*81ad6265SDimitry Andric /// Insert spill instruction for \p AssignedReg before \p Before. 201*81ad6265SDimitry Andric /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot. 202*81ad6265SDimitry Andric void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before, 203*81ad6265SDimitry Andric Register VirtReg, bool Kill) { 204*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n"); 205*81ad6265SDimitry Andric int FI = getStackSpaceFor(VirtReg); 206*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); 207*81ad6265SDimitry Andric 208*81ad6265SDimitry Andric const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 209*81ad6265SDimitry Andric // Don't need shape information for tile store, becasue it is adjacent to 210*81ad6265SDimitry Andric // the tile def instruction. 211*81ad6265SDimitry Andric TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI); 212*81ad6265SDimitry Andric ++NumStores; 213*81ad6265SDimitry Andric 214*81ad6265SDimitry Andric // TODO: update DBG_VALUEs 215*81ad6265SDimitry Andric } 216*81ad6265SDimitry Andric 217*81ad6265SDimitry Andric /// Insert reload instruction for \p PhysReg before \p Before. 218*81ad6265SDimitry Andric void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI, 219*81ad6265SDimitry Andric Register OrigReg, MachineOperand *RowMO, 220*81ad6265SDimitry Andric MachineOperand *ColMO) { 221*81ad6265SDimitry Andric int FI = getStackSpaceFor(OrigReg); 222*81ad6265SDimitry Andric const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg); 223*81ad6265SDimitry Andric Register TileReg; 224*81ad6265SDimitry Andric // Fold copy to tileload 225*81ad6265SDimitry Andric // BB1: 226*81ad6265SDimitry Andric // spill src to s 227*81ad6265SDimitry Andric // 228*81ad6265SDimitry Andric // BB2: 229*81ad6265SDimitry Andric // t = copy src 230*81ad6265SDimitry Andric // --> 231*81ad6265SDimitry Andric // t = tileload (s) 232*81ad6265SDimitry Andric if (UseMI->isCopy()) 233*81ad6265SDimitry Andric TileReg = UseMI->getOperand(0).getReg(); 234*81ad6265SDimitry Andric else 235*81ad6265SDimitry Andric TileReg = MRI->createVirtualRegister(&RC); 236*81ad6265SDimitry Andric // Can't use TII->loadRegFromStackSlot(), because we need the shape 237*81ad6265SDimitry Andric // information for reload. 238*81ad6265SDimitry Andric // tileloadd (%sp, %idx), %tmm 239*81ad6265SDimitry Andric unsigned Opc = X86::PTILELOADDV; 240*81ad6265SDimitry Andric Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 241*81ad6265SDimitry Andric // FIXME: MBB is not the parent of UseMI. 242*81ad6265SDimitry Andric MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), 243*81ad6265SDimitry Andric TII->get(X86::MOV64ri), StrideReg) 244*81ad6265SDimitry Andric .addImm(64); 245*81ad6265SDimitry Andric NewMI = addFrameReference( 246*81ad6265SDimitry Andric BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg) 247*81ad6265SDimitry Andric .addReg(RowMO->getReg()) 248*81ad6265SDimitry Andric .addReg(ColMO->getReg()), 249*81ad6265SDimitry Andric FI); 250*81ad6265SDimitry Andric MachineOperand &MO = NewMI->getOperand(5); 251*81ad6265SDimitry Andric MO.setReg(StrideReg); 252*81ad6265SDimitry Andric MO.setIsKill(true); 253*81ad6265SDimitry Andric RowMO->setIsKill(false); 254*81ad6265SDimitry Andric ColMO->setIsKill(false); 255*81ad6265SDimitry Andric // Erase copy instruction after it is folded. 256*81ad6265SDimitry Andric if (UseMI->isCopy()) { 257*81ad6265SDimitry Andric UseMI->eraseFromParent(); 258*81ad6265SDimitry Andric } else { 259*81ad6265SDimitry Andric // Replace the register in the user MI. 260*81ad6265SDimitry Andric for (auto &MO : UseMI->operands()) { 261*81ad6265SDimitry Andric if (MO.isReg() && MO.getReg() == OrigReg) 262*81ad6265SDimitry Andric MO.setReg(TileReg); 263*81ad6265SDimitry Andric } 264*81ad6265SDimitry Andric } 265*81ad6265SDimitry Andric 266*81ad6265SDimitry Andric ++NumLoads; 267*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into " 268*81ad6265SDimitry Andric << printReg(TileReg, TRI) << '\n'); 269*81ad6265SDimitry Andric } 270*81ad6265SDimitry Andric 271*81ad6265SDimitry Andric static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { 272*81ad6265SDimitry Andric // The instruction must have 3 operands: tile def, row, col. 273*81ad6265SDimitry Andric if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo()) 274*81ad6265SDimitry Andric return false; 275*81ad6265SDimitry Andric MachineOperand &MO = MI.getOperand(0); 276*81ad6265SDimitry Andric 277*81ad6265SDimitry Andric if (MO.isReg()) { 278*81ad6265SDimitry Andric Register Reg = MO.getReg(); 279*81ad6265SDimitry Andric // FIXME it may be used after Greedy RA and the physical 280*81ad6265SDimitry Andric // register is not rewritten yet. 281*81ad6265SDimitry Andric if (Reg.isVirtual() && 282*81ad6265SDimitry Andric MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) 283*81ad6265SDimitry Andric return true; 284*81ad6265SDimitry Andric if (Reg >= X86::TMM0 && Reg <= X86::TMM7) 285*81ad6265SDimitry Andric return true; 286*81ad6265SDimitry Andric } 287*81ad6265SDimitry Andric 288*81ad6265SDimitry Andric return false; 289*81ad6265SDimitry Andric } 290*81ad6265SDimitry Andric 291*81ad6265SDimitry Andric static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) { 292*81ad6265SDimitry Andric MachineInstr *MI = MRI->getVRegDef(TileReg); 293*81ad6265SDimitry Andric if (isTileDef(MRI, *MI)) { 294*81ad6265SDimitry Andric MachineOperand *RowMO = &MI->getOperand(1); 295*81ad6265SDimitry Andric MachineOperand *ColMO = &MI->getOperand(2); 296*81ad6265SDimitry Andric return ShapeT(RowMO, ColMO, MRI); 297*81ad6265SDimitry Andric } else if (MI->isCopy()) { 298*81ad6265SDimitry Andric TileReg = MI->getOperand(1).getReg(); 299*81ad6265SDimitry Andric return getShape(MRI, TileReg); 300*81ad6265SDimitry Andric } 301*81ad6265SDimitry Andric 302*81ad6265SDimitry Andric // The def should not be PHI node, because we walk the MBB in reverse post 303*81ad6265SDimitry Andric // order. 304*81ad6265SDimitry Andric assert(MI->isPHI() && "Unexpected PHI when get shape."); 305*81ad6265SDimitry Andric llvm_unreachable("Unexpected MI when get shape."); 306*81ad6265SDimitry Andric } 307*81ad6265SDimitry Andric 308*81ad6265SDimitry Andric // BB0: 309*81ad6265SDimitry Andric // spill t0 to s0 310*81ad6265SDimitry Andric // BB1: 311*81ad6265SDimitry Andric // spill t1 to s1 312*81ad6265SDimitry Andric // 313*81ad6265SDimitry Andric // BB2: 314*81ad6265SDimitry Andric // t = phi [t0, bb0] [t1, bb1] 315*81ad6265SDimitry Andric // --> 316*81ad6265SDimitry Andric // row = phi [r0, bb0] [r1, bb1] 317*81ad6265SDimitry Andric // col = phi [c0, bb0] [c1, bb1] 318*81ad6265SDimitry Andric // s = phi [s0, bb0] [s1, bb1] 319*81ad6265SDimitry Andric // t = tileload row, col, s 320*81ad6265SDimitry Andric // The new instruction is inserted at the end of the phi node. The order 321*81ad6265SDimitry Andric // of the original phi node is not ensured. 322*81ad6265SDimitry Andric void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB, 323*81ad6265SDimitry Andric MachineInstr &PHI) { 324*81ad6265SDimitry Andric // 1. Create instruction to get stack slot address of each incoming block. 325*81ad6265SDimitry Andric // 2. Create PHI node for the stack address. 326*81ad6265SDimitry Andric // 3. Create PHI node for shape. If one of the incoming shape is immediate 327*81ad6265SDimitry Andric // use the immediate and delete the PHI node. 328*81ad6265SDimitry Andric // 4. Create tileload instruction from the stack address. 329*81ad6265SDimitry Andric Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 330*81ad6265SDimitry Andric MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), 331*81ad6265SDimitry Andric TII->get(X86::PHI), StackAddrReg); 332*81ad6265SDimitry Andric Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass); 333*81ad6265SDimitry Andric MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), 334*81ad6265SDimitry Andric TII->get(X86::PHI), RowReg); 335*81ad6265SDimitry Andric Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass); 336*81ad6265SDimitry Andric MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), 337*81ad6265SDimitry Andric TII->get(X86::PHI), ColReg); 338*81ad6265SDimitry Andric // Record the mapping of phi node and its row/column information. 339*81ad6265SDimitry Andric VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg}; 340*81ad6265SDimitry Andric 341*81ad6265SDimitry Andric for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) { 342*81ad6265SDimitry Andric // Get the 2 incoming value of tile register and MBB. 343*81ad6265SDimitry Andric Register InTileReg = PHI.getOperand(I).getReg(); 344*81ad6265SDimitry Andric // Mark it as liveout, so that it will be spilled when visit 345*81ad6265SDimitry Andric // the incoming MBB. Otherwise since phi will be deleted, it 346*81ad6265SDimitry Andric // would miss spill when visit incoming MBB. 347*81ad6265SDimitry Andric MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg)); 348*81ad6265SDimitry Andric MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB(); 349*81ad6265SDimitry Andric 350*81ad6265SDimitry Andric MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg); 351*81ad6265SDimitry Andric MachineBasicBlock::iterator InsertPos; 352*81ad6265SDimitry Andric if (TileDefMI->isPHI()) { 353*81ad6265SDimitry Andric InsertPos = TileDefMI->getParent()->getFirstNonPHI(); 354*81ad6265SDimitry Andric if (VisitedPHIs.count(TileDefMI)) { // circular phi reference 355*81ad6265SDimitry Andric // def t1 356*81ad6265SDimitry Andric // / \ 357*81ad6265SDimitry Andric // def t2 t3 = phi(t1, t4) <-- 358*81ad6265SDimitry Andric // \ / | 359*81ad6265SDimitry Andric // t4 = phi(t2, t3)------------- 360*81ad6265SDimitry Andric // 361*81ad6265SDimitry Andric // For each (row, column and stack address) append phi incoming value. 362*81ad6265SDimitry Andric // Create r3 = phi(r1, r4) 363*81ad6265SDimitry Andric // Create r4 = phi(r2, r3) 364*81ad6265SDimitry Andric Register InRowReg = VisitedPHIs[TileDefMI].Row; 365*81ad6265SDimitry Andric Register InColReg = VisitedPHIs[TileDefMI].Col; 366*81ad6265SDimitry Andric Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr; 367*81ad6265SDimitry Andric RowPHI.addReg(InRowReg).addMBB(InMBB); 368*81ad6265SDimitry Andric ColPHI.addReg(InColReg).addMBB(InMBB); 369*81ad6265SDimitry Andric AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); 370*81ad6265SDimitry Andric continue; 371*81ad6265SDimitry Andric } else { 372*81ad6265SDimitry Andric // Recursively convert PHI to tileload 373*81ad6265SDimitry Andric convertPHI(TileDefMI->getParent(), *TileDefMI); 374*81ad6265SDimitry Andric // The PHI node is coverted to tileload instruction. Get the stack 375*81ad6265SDimitry Andric // address from tileload operands. 376*81ad6265SDimitry Andric MachineInstr *TileLoad = MRI->getVRegDef(InTileReg); 377*81ad6265SDimitry Andric assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV); 378*81ad6265SDimitry Andric Register InRowReg = TileLoad->getOperand(1).getReg(); 379*81ad6265SDimitry Andric Register InColReg = TileLoad->getOperand(2).getReg(); 380*81ad6265SDimitry Andric Register InStackAddrReg = TileLoad->getOperand(3).getReg(); 381*81ad6265SDimitry Andric RowPHI.addReg(InRowReg).addMBB(InMBB); 382*81ad6265SDimitry Andric ColPHI.addReg(InColReg).addMBB(InMBB); 383*81ad6265SDimitry Andric AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); 384*81ad6265SDimitry Andric } 385*81ad6265SDimitry Andric } else { 386*81ad6265SDimitry Andric InsertPos = TileDefMI->getIterator(); 387*81ad6265SDimitry Andric 388*81ad6265SDimitry Andric // Fill the incoming operand of row/column phi instruction. 389*81ad6265SDimitry Andric ShapeT Shape = getShape(MRI, InTileReg); 390*81ad6265SDimitry Andric Shape.getRow()->setIsKill(false); 391*81ad6265SDimitry Andric Shape.getCol()->setIsKill(false); 392*81ad6265SDimitry Andric RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB); 393*81ad6265SDimitry Andric ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB); 394*81ad6265SDimitry Andric 395*81ad6265SDimitry Andric // The incoming tile register live out of its def BB, it would be spilled. 396*81ad6265SDimitry Andric // Create MI to get the spill stack slot address for the tile register 397*81ad6265SDimitry Andric int FI = getStackSpaceFor(InTileReg); 398*81ad6265SDimitry Andric Register InStackAddrReg = 399*81ad6265SDimitry Andric MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 400*81ad6265SDimitry Andric addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(), 401*81ad6265SDimitry Andric TII->get(X86::LEA64r), InStackAddrReg) 402*81ad6265SDimitry Andric .addFrameIndex(FI), 403*81ad6265SDimitry Andric 0); 404*81ad6265SDimitry Andric AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); 405*81ad6265SDimitry Andric } 406*81ad6265SDimitry Andric } 407*81ad6265SDimitry Andric 408*81ad6265SDimitry Andric MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); 409*81ad6265SDimitry Andric Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 410*81ad6265SDimitry Andric BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg) 411*81ad6265SDimitry Andric .addImm(64); 412*81ad6265SDimitry Andric Register TileReg = PHI.getOperand(0).getReg(); 413*81ad6265SDimitry Andric MachineInstr *NewMI = addDirectMem( 414*81ad6265SDimitry Andric BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg) 415*81ad6265SDimitry Andric .addReg(RowReg) 416*81ad6265SDimitry Andric .addReg(ColReg), 417*81ad6265SDimitry Andric StackAddrReg); 418*81ad6265SDimitry Andric MachineOperand &MO = NewMI->getOperand(5); 419*81ad6265SDimitry Andric MO.setReg(StrideReg); 420*81ad6265SDimitry Andric MO.setIsKill(true); 421*81ad6265SDimitry Andric PHI.eraseFromParent(); 422*81ad6265SDimitry Andric VisitedPHIs.erase(&PHI); 423*81ad6265SDimitry Andric } 424*81ad6265SDimitry Andric 425*81ad6265SDimitry Andric static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) { 426*81ad6265SDimitry Andric MachineOperand &MO = MI.getOperand(0); 427*81ad6265SDimitry Andric if (MO.isReg() && MO.getReg().isVirtual() && 428*81ad6265SDimitry Andric MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) 429*81ad6265SDimitry Andric return true; 430*81ad6265SDimitry Andric return false; 431*81ad6265SDimitry Andric } 432*81ad6265SDimitry Andric 433*81ad6265SDimitry Andric void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) { 434*81ad6265SDimitry Andric SmallVector<MachineInstr *, 8> PHIs; 435*81ad6265SDimitry Andric 436*81ad6265SDimitry Andric for (MachineInstr &MI : MBB) { 437*81ad6265SDimitry Andric if (!MI.isPHI()) 438*81ad6265SDimitry Andric break; 439*81ad6265SDimitry Andric if (!isTileRegDef(MRI, MI)) 440*81ad6265SDimitry Andric continue; 441*81ad6265SDimitry Andric PHIs.push_back(&MI); 442*81ad6265SDimitry Andric } 443*81ad6265SDimitry Andric // Canonicalize the phi node first. One tile phi may depeneds previous 444*81ad6265SDimitry Andric // phi node. For below case, we need convert %t4. 445*81ad6265SDimitry Andric // 446*81ad6265SDimitry Andric // BB0: 447*81ad6265SDimitry Andric // %t3 = phi (t1 BB1, t2 BB0) 448*81ad6265SDimitry Andric // %t4 = phi (t5 BB1, t3 BB0) 449*81ad6265SDimitry Andric // --> 450*81ad6265SDimitry Andric // %t3 = phi (t1 BB1, t2 BB0) 451*81ad6265SDimitry Andric // %t4 = phi (t5 BB1, t2 BB0) 452*81ad6265SDimitry Andric // 453*81ad6265SDimitry Andric while (!PHIs.empty()) { 454*81ad6265SDimitry Andric MachineInstr *PHI = PHIs.pop_back_val(); 455*81ad6265SDimitry Andric 456*81ad6265SDimitry Andric // Find the operand that is incoming from the same MBB and the def 457*81ad6265SDimitry Andric // is also phi node. 458*81ad6265SDimitry Andric MachineOperand *InMO = nullptr; 459*81ad6265SDimitry Andric MachineInstr *DefMI = nullptr; 460*81ad6265SDimitry Andric for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) { 461*81ad6265SDimitry Andric Register InTileReg = PHI->getOperand(I).getReg(); 462*81ad6265SDimitry Andric MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); 463*81ad6265SDimitry Andric DefMI = MRI->getVRegDef(InTileReg); 464*81ad6265SDimitry Andric if (InMBB != &MBB || !DefMI->isPHI()) 465*81ad6265SDimitry Andric continue; 466*81ad6265SDimitry Andric 467*81ad6265SDimitry Andric InMO = &PHI->getOperand(I); 468*81ad6265SDimitry Andric break; 469*81ad6265SDimitry Andric } 470*81ad6265SDimitry Andric // If can't find such operand, do nothing. 471*81ad6265SDimitry Andric if (!InMO) 472*81ad6265SDimitry Andric continue; 473*81ad6265SDimitry Andric 474*81ad6265SDimitry Andric // Current phi node depends on previous phi node. Break the 475*81ad6265SDimitry Andric // dependency. 476*81ad6265SDimitry Andric Register DefTileReg; 477*81ad6265SDimitry Andric for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) { 478*81ad6265SDimitry Andric MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); 479*81ad6265SDimitry Andric if (InMBB != &MBB) 480*81ad6265SDimitry Andric continue; 481*81ad6265SDimitry Andric DefTileReg = DefMI->getOperand(I).getReg(); 482*81ad6265SDimitry Andric InMO->setReg(DefTileReg); 483*81ad6265SDimitry Andric break; 484*81ad6265SDimitry Andric } 485*81ad6265SDimitry Andric } 486*81ad6265SDimitry Andric } 487*81ad6265SDimitry Andric 488*81ad6265SDimitry Andric void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) { 489*81ad6265SDimitry Andric SmallVector<MachineInstr *, 8> PHIs; 490*81ad6265SDimitry Andric for (MachineInstr &MI : MBB) { 491*81ad6265SDimitry Andric if (!MI.isPHI()) 492*81ad6265SDimitry Andric break; 493*81ad6265SDimitry Andric if (!isTileRegDef(MRI, MI)) 494*81ad6265SDimitry Andric continue; 495*81ad6265SDimitry Andric PHIs.push_back(&MI); 496*81ad6265SDimitry Andric } 497*81ad6265SDimitry Andric while (!PHIs.empty()) { 498*81ad6265SDimitry Andric MachineInstr *MI = PHIs.pop_back_val(); 499*81ad6265SDimitry Andric VisitedPHIs.clear(); 500*81ad6265SDimitry Andric convertPHI(&MBB, *MI); 501*81ad6265SDimitry Andric } 502*81ad6265SDimitry Andric } 503*81ad6265SDimitry Andric 504*81ad6265SDimitry Andric // PreTileConfig should configure the tile registers based on basic 505*81ad6265SDimitry Andric // block. 506*81ad6265SDimitry Andric bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) { 507*81ad6265SDimitry Andric this->MBB = &MBB; 508*81ad6265SDimitry Andric bool Change = false; 509*81ad6265SDimitry Andric MachineInstr *LastShapeMI = nullptr; 510*81ad6265SDimitry Andric MachineInstr *LastTileCfg = nullptr; 511*81ad6265SDimitry Andric bool HasUnconfigTile = false; 512*81ad6265SDimitry Andric 513*81ad6265SDimitry Andric auto Config = [&](MachineInstr &Before) { 514*81ad6265SDimitry Andric if (CfgSS == -1) 515*81ad6265SDimitry Andric CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(), 516*81ad6265SDimitry Andric ST->getTileConfigAlignment(), false); 517*81ad6265SDimitry Andric LastTileCfg = addFrameReference( 518*81ad6265SDimitry Andric BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS); 519*81ad6265SDimitry Andric LastShapeMI = nullptr; 520*81ad6265SDimitry Andric Change = true; 521*81ad6265SDimitry Andric }; 522*81ad6265SDimitry Andric auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) { 523*81ad6265SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 524*81ad6265SDimitry Andric if (!MO.isReg()) 525*81ad6265SDimitry Andric continue; 526*81ad6265SDimitry Andric Register Reg = MO.getReg(); 527*81ad6265SDimitry Andric if (Reg.isVirtual() && 528*81ad6265SDimitry Andric MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) 529*81ad6265SDimitry Andric return true; 530*81ad6265SDimitry Andric } 531*81ad6265SDimitry Andric return false; 532*81ad6265SDimitry Andric }; 533*81ad6265SDimitry Andric for (MachineInstr &MI : reverse(MBB)) { 534*81ad6265SDimitry Andric // We have transformed phi node before configuring BB. 535*81ad6265SDimitry Andric if (MI.isPHI()) 536*81ad6265SDimitry Andric break; 537*81ad6265SDimitry Andric // Don't collect the shape of used tile, the tile should be defined 538*81ad6265SDimitry Andric // before the tile use. Spill and reload would happen if there is only 539*81ad6265SDimitry Andric // tile use after ldtilecfg, so the shape can be collected from reload. 540*81ad6265SDimitry Andric // Take below code for example. %t would be reloaded before tilestore 541*81ad6265SDimitry Andric // call 542*81ad6265SDimitry Andric // .... 543*81ad6265SDimitry Andric // tilestore %r, %c, %t 544*81ad6265SDimitry Andric // --> 545*81ad6265SDimitry Andric // call 546*81ad6265SDimitry Andric // ldtilecfg 547*81ad6265SDimitry Andric // %t = tileload %r, %c 548*81ad6265SDimitry Andric // tilestore %r, %c, %t 549*81ad6265SDimitry Andric if (HasTileOperand(MRI, MI)) 550*81ad6265SDimitry Andric HasUnconfigTile = true; 551*81ad6265SDimitry Andric // According to AMX ABI, all the tile registers including config register 552*81ad6265SDimitry Andric // are volatile. Caller need to save/restore config register. 553*81ad6265SDimitry Andric if (MI.isCall() && HasUnconfigTile) { 554*81ad6265SDimitry Andric MachineBasicBlock::iterator I; 555*81ad6265SDimitry Andric if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) 556*81ad6265SDimitry Andric I = ++LastShapeMI->getIterator(); 557*81ad6265SDimitry Andric else 558*81ad6265SDimitry Andric I = ++MI.getIterator(); 559*81ad6265SDimitry Andric Config(*I); 560*81ad6265SDimitry Andric HasUnconfigTile = false; 561*81ad6265SDimitry Andric continue; 562*81ad6265SDimitry Andric } 563*81ad6265SDimitry Andric if (!isTileDef(MRI, MI)) 564*81ad6265SDimitry Andric continue; 565*81ad6265SDimitry Andric // 566*81ad6265SDimitry Andric //--------------------------------------------------------------------- 567*81ad6265SDimitry Andric // Don't handle COPY instruction. If the src and dst of the COPY can be 568*81ad6265SDimitry Andric // in the same config in below case, we just check the shape of t0. 569*81ad6265SDimitry Andric // def row0 570*81ad6265SDimitry Andric // def col0 571*81ad6265SDimitry Andric // ldtilecfg 572*81ad6265SDimitry Andric // t0 = tielzero(row0, col0) 573*81ad6265SDimitry Andric // t1 = copy t0 574*81ad6265SDimitry Andric // ... 575*81ad6265SDimitry Andric // If the src and dst of the COPY can NOT be in the same config in below 576*81ad6265SDimitry Andric // case. Reload would be generated befor the copy instruction. 577*81ad6265SDimitry Andric // def row0 578*81ad6265SDimitry Andric // def col0 579*81ad6265SDimitry Andric // t0 = tielzero(row0, col0) 580*81ad6265SDimitry Andric // spill t0 581*81ad6265SDimitry Andric // ... 582*81ad6265SDimitry Andric // def row1 583*81ad6265SDimitry Andric // def col1 584*81ad6265SDimitry Andric // ldtilecfg 585*81ad6265SDimitry Andric // t1 = tilezero(row1, col1) 586*81ad6265SDimitry Andric // reload t0 587*81ad6265SDimitry Andric // t1 = copy t0 588*81ad6265SDimitry Andric //--------------------------------------------------------------------- 589*81ad6265SDimitry Andric // 590*81ad6265SDimitry Andric // If MI dominate the last shape def instruction, we need insert 591*81ad6265SDimitry Andric // ldtilecfg after LastShapeMI now. The config doesn't include 592*81ad6265SDimitry Andric // current MI. 593*81ad6265SDimitry Andric // def row0 594*81ad6265SDimitry Andric // def col0 595*81ad6265SDimitry Andric // tilezero(row0, col0) <- MI 596*81ad6265SDimitry Andric // def row1 597*81ad6265SDimitry Andric // def col1 598*81ad6265SDimitry Andric // ldtilecfg <- insert 599*81ad6265SDimitry Andric // tilezero(row1, col1) 600*81ad6265SDimitry Andric if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) 601*81ad6265SDimitry Andric Config(*(++LastShapeMI->getIterator())); 602*81ad6265SDimitry Andric MachineOperand *RowMO = &MI.getOperand(1); 603*81ad6265SDimitry Andric MachineOperand *ColMO = &MI.getOperand(2); 604*81ad6265SDimitry Andric MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg()); 605*81ad6265SDimitry Andric MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg()); 606*81ad6265SDimitry Andric // If the shape is defined in current MBB, check the domination. 607*81ad6265SDimitry Andric // FIXME how about loop? 608*81ad6265SDimitry Andric if (RowMI->getParent() == &MBB) { 609*81ad6265SDimitry Andric if (!LastShapeMI) 610*81ad6265SDimitry Andric LastShapeMI = RowMI; 611*81ad6265SDimitry Andric else if (dominates(MBB, LastShapeMI, RowMI)) 612*81ad6265SDimitry Andric LastShapeMI = RowMI; 613*81ad6265SDimitry Andric } 614*81ad6265SDimitry Andric if (ColMI->getParent() == &MBB) { 615*81ad6265SDimitry Andric if (!LastShapeMI) 616*81ad6265SDimitry Andric LastShapeMI = ColMI; 617*81ad6265SDimitry Andric else if (dominates(MBB, LastShapeMI, ColMI)) 618*81ad6265SDimitry Andric LastShapeMI = ColMI; 619*81ad6265SDimitry Andric } 620*81ad6265SDimitry Andric // If there is user live out of the tilecfg, spill it and reload in 621*81ad6265SDimitry Andric // before the user. 622*81ad6265SDimitry Andric Register TileReg = MI.getOperand(0).getReg(); 623*81ad6265SDimitry Andric if (mayLiveOut(TileReg, LastTileCfg)) 624*81ad6265SDimitry Andric spill(++MI.getIterator(), TileReg, false); 625*81ad6265SDimitry Andric for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) { 626*81ad6265SDimitry Andric if (UseMI.getParent() == &MBB) { 627*81ad6265SDimitry Andric // check user should not across ldtilecfg 628*81ad6265SDimitry Andric if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI)) 629*81ad6265SDimitry Andric continue; 630*81ad6265SDimitry Andric // reload befor UseMI 631*81ad6265SDimitry Andric reload(UseMI.getIterator(), TileReg, RowMO, ColMO); 632*81ad6265SDimitry Andric } else { 633*81ad6265SDimitry Andric // Don't reload for phi instruction, we handle phi reload separately. 634*81ad6265SDimitry Andric // TODO: merge the reload for the same user MBB. 635*81ad6265SDimitry Andric if (!UseMI.isPHI()) 636*81ad6265SDimitry Andric reload(UseMI.getIterator(), TileReg, RowMO, ColMO); 637*81ad6265SDimitry Andric } 638*81ad6265SDimitry Andric } 639*81ad6265SDimitry Andric } 640*81ad6265SDimitry Andric 641*81ad6265SDimitry Andric // Configure tile registers at the head of the MBB 642*81ad6265SDimitry Andric if (HasUnconfigTile) { 643*81ad6265SDimitry Andric MachineInstr *Before; 644*81ad6265SDimitry Andric if (LastShapeMI == nullptr || LastShapeMI->isPHI()) 645*81ad6265SDimitry Andric Before = &*MBB.getFirstNonPHI(); 646*81ad6265SDimitry Andric else 647*81ad6265SDimitry Andric Before = &*(++LastShapeMI->getIterator()); 648*81ad6265SDimitry Andric 649*81ad6265SDimitry Andric Config(*Before); 650*81ad6265SDimitry Andric } 651*81ad6265SDimitry Andric 652*81ad6265SDimitry Andric return Change; 653*81ad6265SDimitry Andric } 654*81ad6265SDimitry Andric 655*81ad6265SDimitry Andric bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) { 656*81ad6265SDimitry Andric MF = &MFunc; 657*81ad6265SDimitry Andric MRI = &MFunc.getRegInfo(); 658*81ad6265SDimitry Andric ST = &MFunc.getSubtarget<X86Subtarget>(); 659*81ad6265SDimitry Andric TII = ST->getInstrInfo(); 660*81ad6265SDimitry Andric X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); 661*81ad6265SDimitry Andric MFI = &MFunc.getFrameInfo(); 662*81ad6265SDimitry Andric TRI = ST->getRegisterInfo(); 663*81ad6265SDimitry Andric CfgSS = -1; 664*81ad6265SDimitry Andric 665*81ad6265SDimitry Andric unsigned NumVirtRegs = MRI->getNumVirtRegs(); 666*81ad6265SDimitry Andric // Abandon early if there is no tile register to config. 667*81ad6265SDimitry Andric bool HasVirtTileReg = false; 668*81ad6265SDimitry Andric for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) { 669*81ad6265SDimitry Andric Register VirtReg = Register::index2VirtReg(I); 670*81ad6265SDimitry Andric if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) { 671*81ad6265SDimitry Andric HasVirtTileReg = true; 672*81ad6265SDimitry Andric break; 673*81ad6265SDimitry Andric } 674*81ad6265SDimitry Andric } 675*81ad6265SDimitry Andric if (!HasVirtTileReg) 676*81ad6265SDimitry Andric return false; 677*81ad6265SDimitry Andric 678*81ad6265SDimitry Andric StackSlotForVirtReg.resize(NumVirtRegs); 679*81ad6265SDimitry Andric MayLiveAcrossBlocks.clear(); 680*81ad6265SDimitry Andric // We will create register during config. *3 is to make sure 681*81ad6265SDimitry Andric // the virtual register number doesn't exceed the size of 682*81ad6265SDimitry Andric // the bit vector. 683*81ad6265SDimitry Andric MayLiveAcrossBlocks.resize(NumVirtRegs * 3); 684*81ad6265SDimitry Andric bool Change = false; 685*81ad6265SDimitry Andric assert(MRI->isSSA()); 686*81ad6265SDimitry Andric 687*81ad6265SDimitry Andric // Canonicalize the phi node first. 688*81ad6265SDimitry Andric for (MachineBasicBlock &MBB : MFunc) 689*81ad6265SDimitry Andric canonicalizePHIs(MBB); 690*81ad6265SDimitry Andric 691*81ad6265SDimitry Andric // Loop over all of the basic blocks in reverse post order and insert 692*81ad6265SDimitry Andric // ldtilecfg for tile registers. The reserse post order is to facilitate 693*81ad6265SDimitry Andric // PHI node convert. 694*81ad6265SDimitry Andric ReversePostOrderTraversal<MachineFunction *> RPOT(MF); 695*81ad6265SDimitry Andric for (MachineBasicBlock *MBB : RPOT) { 696*81ad6265SDimitry Andric convertPHIs(*MBB); 697*81ad6265SDimitry Andric Change |= configBasicBlock(*MBB); 698*81ad6265SDimitry Andric } 699*81ad6265SDimitry Andric 700*81ad6265SDimitry Andric if (Change) 701*81ad6265SDimitry Andric InitializeTileConfigStackSpace(); 702*81ad6265SDimitry Andric 703*81ad6265SDimitry Andric StackSlotForVirtReg.clear(); 704*81ad6265SDimitry Andric return Change; 705*81ad6265SDimitry Andric } 706*81ad6265SDimitry Andric 707*81ad6265SDimitry Andric FunctionPass *llvm::createX86FastPreTileConfigPass() { 708*81ad6265SDimitry Andric return new X86FastPreTileConfig(); 709*81ad6265SDimitry Andric } 710