1496156acSLuo, Yuanke //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===// 2496156acSLuo, Yuanke // 3496156acSLuo, Yuanke // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4496156acSLuo, Yuanke // See https://llvm.org/LICENSE.txt for license information. 5496156acSLuo, Yuanke // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6496156acSLuo, Yuanke // 7496156acSLuo, Yuanke //===----------------------------------------------------------------------===// 8496156acSLuo, Yuanke // 9496156acSLuo, Yuanke /// \file Pass to preconfig the shape of physical tile registers 10496156acSLuo, Yuanke /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm 11496156acSLuo, Yuanke /// walk each instruction of basic block in reverse order. All the tile 12496156acSLuo, Yuanke /// registers that live out the basic block would be spilled and reloaded 13496156acSLuo, Yuanke /// before its user. It also check the depenedency of the shape to ensure 14496156acSLuo, Yuanke /// the shape is defined before ldtilecfg. 15496156acSLuo, Yuanke // 16496156acSLuo, Yuanke //===----------------------------------------------------------------------===// 17496156acSLuo, Yuanke 18496156acSLuo, Yuanke #include "X86.h" 19496156acSLuo, Yuanke #include "X86InstrBuilder.h" 20496156acSLuo, Yuanke #include "X86MachineFunctionInfo.h" 21496156acSLuo, Yuanke #include "X86RegisterInfo.h" 22496156acSLuo, Yuanke #include "X86Subtarget.h" 23496156acSLuo, Yuanke #include "llvm/ADT/PostOrderIterator.h" 24496156acSLuo, Yuanke #include "llvm/ADT/Statistic.h" 25496156acSLuo, Yuanke #include "llvm/CodeGen/MachineFrameInfo.h" 26496156acSLuo, Yuanke #include "llvm/CodeGen/MachineFunctionPass.h" 27496156acSLuo, Yuanke #include "llvm/CodeGen/MachineInstr.h" 28496156acSLuo, Yuanke #include "llvm/CodeGen/MachineRegisterInfo.h" 29496156acSLuo, Yuanke #include "llvm/CodeGen/Passes.h" 30496156acSLuo, Yuanke #include "llvm/CodeGen/TargetInstrInfo.h" 31496156acSLuo, Yuanke #include "llvm/CodeGen/TargetRegisterInfo.h" 32496156acSLuo, Yuanke #include "llvm/Support/Debug.h" 33496156acSLuo, Yuanke 34496156acSLuo, Yuanke using namespace llvm; 35496156acSLuo, Yuanke 36496156acSLuo, Yuanke #define DEBUG_TYPE "fastpretileconfig" 37496156acSLuo, Yuanke 38496156acSLuo, Yuanke STATISTIC(NumStores, "Number of stores added"); 39496156acSLuo, Yuanke STATISTIC(NumLoads, "Number of loads added"); 40496156acSLuo, Yuanke 41496156acSLuo, Yuanke namespace { 42496156acSLuo, Yuanke 43496156acSLuo, Yuanke class X86FastPreTileConfig : public MachineFunctionPass { 44496156acSLuo, Yuanke MachineFunction *MF = nullptr; 45496156acSLuo, Yuanke const X86Subtarget *ST = nullptr; 46496156acSLuo, Yuanke const TargetInstrInfo *TII = nullptr; 47496156acSLuo, Yuanke MachineRegisterInfo *MRI = nullptr; 48496156acSLuo, Yuanke X86MachineFunctionInfo *X86FI = nullptr; 49496156acSLuo, Yuanke MachineFrameInfo *MFI = nullptr; 50496156acSLuo, Yuanke const TargetRegisterInfo *TRI = nullptr; 51496156acSLuo, Yuanke MachineBasicBlock *MBB = nullptr; 52496156acSLuo, Yuanke int CfgSS = -1; 53496156acSLuo, Yuanke struct PHIInfo { 54496156acSLuo, Yuanke Register Row; 55496156acSLuo, Yuanke Register Col; 56496156acSLuo, Yuanke Register StackAddr; 57496156acSLuo, Yuanke }; 58496156acSLuo, Yuanke DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs; 59496156acSLuo, Yuanke 60496156acSLuo, Yuanke /// Maps virtual regs to the frame index where these values are spilled. 61496156acSLuo, Yuanke IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; 62496156acSLuo, Yuanke 63496156acSLuo, Yuanke /// Has a bit set for tile virtual register for which it was determined 64496156acSLuo, Yuanke /// that it is alive across blocks. 65496156acSLuo, Yuanke BitVector MayLiveAcrossBlocks; 66496156acSLuo, Yuanke 67496156acSLuo, Yuanke int getStackSpaceFor(Register VirtReg); 68496156acSLuo, Yuanke void InitializeTileConfigStackSpace(); 69496156acSLuo, Yuanke bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI); 70496156acSLuo, Yuanke void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill); 71496156acSLuo, Yuanke void reload(MachineBasicBlock::iterator UseMI, Register VirtReg, 72496156acSLuo, Yuanke MachineOperand *RowMO, MachineOperand *ColMO); 73496156acSLuo, Yuanke void canonicalizePHIs(MachineBasicBlock &MBB); 74496156acSLuo, Yuanke void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI); 75496156acSLuo, Yuanke void convertPHIs(MachineBasicBlock &MBB); 76496156acSLuo, Yuanke bool configBasicBlock(MachineBasicBlock &MBB); 77496156acSLuo, Yuanke 78496156acSLuo, Yuanke public: 79496156acSLuo, Yuanke X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {} 80496156acSLuo, Yuanke 81496156acSLuo, Yuanke /// Return the pass name. 82496156acSLuo, Yuanke StringRef getPassName() const override { 83496156acSLuo, Yuanke return "Fast Tile Register Preconfigure"; 84496156acSLuo, Yuanke } 85496156acSLuo, Yuanke 86496156acSLuo, Yuanke /// Perform tile register configure. 87496156acSLuo, Yuanke bool runOnMachineFunction(MachineFunction &MFunc) override; 88496156acSLuo, Yuanke 89496156acSLuo, Yuanke static char ID; 90496156acSLuo, Yuanke }; 91496156acSLuo, Yuanke 92496156acSLuo, Yuanke } // end anonymous namespace 93496156acSLuo, Yuanke 94496156acSLuo, Yuanke char X86FastPreTileConfig::ID = 0; 95496156acSLuo, Yuanke 96496156acSLuo, Yuanke INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE, 97496156acSLuo, Yuanke "Fast Tile Register Preconfigure", false, false) 98496156acSLuo, Yuanke INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE, 99496156acSLuo, Yuanke "Fast Tile Register Preconfigure", false, false) 100496156acSLuo, Yuanke 101496156acSLuo, Yuanke static bool dominates(MachineBasicBlock &MBB, 102496156acSLuo, Yuanke MachineBasicBlock::const_iterator A, 103496156acSLuo, Yuanke MachineBasicBlock::const_iterator B) { 104496156acSLuo, Yuanke auto MBBEnd = MBB.end(); 105496156acSLuo, Yuanke if (B == MBBEnd) 106496156acSLuo, Yuanke return true; 107496156acSLuo, Yuanke 108496156acSLuo, Yuanke MachineBasicBlock::const_iterator I = MBB.begin(); 109496156acSLuo, Yuanke for (; &*I != A && &*I != B; ++I) 110496156acSLuo, Yuanke ; 111496156acSLuo, Yuanke 112496156acSLuo, Yuanke return &*I == A; 113496156acSLuo, Yuanke } 114496156acSLuo, Yuanke 115496156acSLuo, Yuanke /// This allocates space for the specified virtual register to be held on the 116496156acSLuo, Yuanke /// stack. 117496156acSLuo, Yuanke int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) { 118496156acSLuo, Yuanke // Find the location Reg would belong... 119496156acSLuo, Yuanke int SS = StackSlotForVirtReg[VirtReg]; 120496156acSLuo, Yuanke // Already has space allocated? 121496156acSLuo, Yuanke if (SS != -1) 122496156acSLuo, Yuanke return SS; 123496156acSLuo, Yuanke 124496156acSLuo, Yuanke // Allocate a new stack object for this spill location... 125496156acSLuo, Yuanke const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 126496156acSLuo, Yuanke unsigned Size = TRI->getSpillSize(RC); 127496156acSLuo, Yuanke Align Alignment = TRI->getSpillAlign(RC); 128496156acSLuo, Yuanke int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment); 129496156acSLuo, Yuanke 130496156acSLuo, Yuanke // Assign the slot. 131496156acSLuo, Yuanke StackSlotForVirtReg[VirtReg] = FrameIdx; 132496156acSLuo, Yuanke return FrameIdx; 133496156acSLuo, Yuanke } 134496156acSLuo, Yuanke 135496156acSLuo, Yuanke /// Returns false if \p VirtReg is known to not live out of the current config. 136496156acSLuo, Yuanke /// If \p VirtReg live out of the current MBB, it must live out of the current 137496156acSLuo, Yuanke /// config 138496156acSLuo, Yuanke bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) { 139496156acSLuo, Yuanke if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) 140496156acSLuo, Yuanke return true; 141496156acSLuo, Yuanke 142496156acSLuo, Yuanke for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) { 143496156acSLuo, Yuanke if (UseInst.getParent() != MBB) { 144496156acSLuo, Yuanke MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); 145496156acSLuo, Yuanke return true; 146496156acSLuo, Yuanke } 147496156acSLuo, Yuanke 148496156acSLuo, Yuanke // The use and def are in the same MBB. If the tile register is 149496156acSLuo, Yuanke // reconfigured, it is crobbered and we need to spill and reload 150496156acSLuo, Yuanke // tile register. 151496156acSLuo, Yuanke if (CfgMI) { 152496156acSLuo, Yuanke if (dominates(*MBB, *CfgMI, UseInst)) { 153496156acSLuo, Yuanke MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); 154496156acSLuo, Yuanke return true; 155496156acSLuo, Yuanke } 156496156acSLuo, Yuanke } 157496156acSLuo, Yuanke } 158496156acSLuo, Yuanke 159496156acSLuo, Yuanke return false; 160496156acSLuo, Yuanke } 161496156acSLuo, Yuanke 162496156acSLuo, Yuanke void X86FastPreTileConfig::InitializeTileConfigStackSpace() { 163496156acSLuo, Yuanke MachineBasicBlock &MBB = MF->front(); 164496156acSLuo, Yuanke MachineInstr *MI = &*MBB.getFirstNonPHI(); 165496156acSLuo, Yuanke DebugLoc DL; 166496156acSLuo, Yuanke if (ST->hasAVX512()) { 167496156acSLuo, Yuanke Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); 168496156acSLuo, Yuanke BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm); 169496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS) 170496156acSLuo, Yuanke .addReg(Zmm); 171496156acSLuo, Yuanke } else if (ST->hasAVX2()) { 172496156acSLuo, Yuanke Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); 173496156acSLuo, Yuanke BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm); 174496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS) 175496156acSLuo, Yuanke .addReg(Ymm); 176496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS, 177496156acSLuo, Yuanke 32) 178496156acSLuo, Yuanke .addReg(Ymm); 179496156acSLuo, Yuanke } else { 180496156acSLuo, Yuanke assert(ST->hasSSE2() && "AMX should assume SSE2 enabled"); 181496156acSLuo, Yuanke unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; 182496156acSLuo, Yuanke Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); 183496156acSLuo, Yuanke BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm); 184496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS) 185496156acSLuo, Yuanke .addReg(Xmm); 186496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16) 187496156acSLuo, Yuanke .addReg(Xmm); 188496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32) 189496156acSLuo, Yuanke .addReg(Xmm); 190496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48) 191496156acSLuo, Yuanke .addReg(Xmm); 192496156acSLuo, Yuanke } 193496156acSLuo, Yuanke // Fill in the palette first. 194496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS) 195496156acSLuo, Yuanke .addImm(1); 196496156acSLuo, Yuanke } 197496156acSLuo, Yuanke 198496156acSLuo, Yuanke /// Insert spill instruction for \p AssignedReg before \p Before. 199496156acSLuo, Yuanke /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot. 200496156acSLuo, Yuanke void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before, 201496156acSLuo, Yuanke Register VirtReg, bool Kill) { 202496156acSLuo, Yuanke LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n"); 203496156acSLuo, Yuanke int FI = getStackSpaceFor(VirtReg); 204496156acSLuo, Yuanke LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); 205496156acSLuo, Yuanke 206496156acSLuo, Yuanke const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 207496156acSLuo, Yuanke // Don't need shape information for tile store, becasue it is adjacent to 208496156acSLuo, Yuanke // the tile def instruction. 209b5efec4bSChristudasan Devadasan TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI, 210b5efec4bSChristudasan Devadasan Register()); 211496156acSLuo, Yuanke ++NumStores; 212496156acSLuo, Yuanke 213496156acSLuo, Yuanke // TODO: update DBG_VALUEs 214496156acSLuo, Yuanke } 215496156acSLuo, Yuanke 216496156acSLuo, Yuanke /// Insert reload instruction for \p PhysReg before \p Before. 217496156acSLuo, Yuanke void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI, 218496156acSLuo, Yuanke Register OrigReg, MachineOperand *RowMO, 219496156acSLuo, Yuanke MachineOperand *ColMO) { 220496156acSLuo, Yuanke int FI = getStackSpaceFor(OrigReg); 221496156acSLuo, Yuanke const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg); 222496156acSLuo, Yuanke Register TileReg; 223496156acSLuo, Yuanke // Fold copy to tileload 224496156acSLuo, Yuanke // BB1: 225496156acSLuo, Yuanke // spill src to s 226496156acSLuo, Yuanke // 227496156acSLuo, Yuanke // BB2: 228496156acSLuo, Yuanke // t = copy src 229496156acSLuo, Yuanke // --> 230496156acSLuo, Yuanke // t = tileload (s) 231496156acSLuo, Yuanke if (UseMI->isCopy()) 232496156acSLuo, Yuanke TileReg = UseMI->getOperand(0).getReg(); 233496156acSLuo, Yuanke else 234496156acSLuo, Yuanke TileReg = MRI->createVirtualRegister(&RC); 235496156acSLuo, Yuanke // Can't use TII->loadRegFromStackSlot(), because we need the shape 236496156acSLuo, Yuanke // information for reload. 237496156acSLuo, Yuanke // tileloadd (%sp, %idx), %tmm 238496156acSLuo, Yuanke unsigned Opc = X86::PTILELOADDV; 239496156acSLuo, Yuanke Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 240496156acSLuo, Yuanke // FIXME: MBB is not the parent of UseMI. 241496156acSLuo, Yuanke MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), 242496156acSLuo, Yuanke TII->get(X86::MOV64ri), StrideReg) 243496156acSLuo, Yuanke .addImm(64); 244496156acSLuo, Yuanke NewMI = addFrameReference( 245496156acSLuo, Yuanke BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg) 246496156acSLuo, Yuanke .addReg(RowMO->getReg()) 247496156acSLuo, Yuanke .addReg(ColMO->getReg()), 248496156acSLuo, Yuanke FI); 249496156acSLuo, Yuanke MachineOperand &MO = NewMI->getOperand(5); 250496156acSLuo, Yuanke MO.setReg(StrideReg); 251496156acSLuo, Yuanke MO.setIsKill(true); 252496156acSLuo, Yuanke RowMO->setIsKill(false); 253496156acSLuo, Yuanke ColMO->setIsKill(false); 254496156acSLuo, Yuanke // Erase copy instruction after it is folded. 255496156acSLuo, Yuanke if (UseMI->isCopy()) { 256496156acSLuo, Yuanke UseMI->eraseFromParent(); 257496156acSLuo, Yuanke } else { 258496156acSLuo, Yuanke // Replace the register in the user MI. 259496156acSLuo, Yuanke for (auto &MO : UseMI->operands()) { 260496156acSLuo, Yuanke if (MO.isReg() && MO.getReg() == OrigReg) 261496156acSLuo, Yuanke MO.setReg(TileReg); 262496156acSLuo, Yuanke } 263496156acSLuo, Yuanke } 264496156acSLuo, Yuanke 265496156acSLuo, Yuanke ++NumLoads; 266496156acSLuo, Yuanke LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into " 267496156acSLuo, Yuanke << printReg(TileReg, TRI) << '\n'); 268496156acSLuo, Yuanke } 269496156acSLuo, Yuanke 270*c72a751dSPhoebe Wang static unsigned getTileDefNum(MachineRegisterInfo *MRI, Register Reg) { 271*c72a751dSPhoebe Wang if (Reg.isVirtual()) { 272*c72a751dSPhoebe Wang unsigned RegClassID = MRI->getRegClass(Reg)->getID(); 273*c72a751dSPhoebe Wang if (RegClassID == X86::TILERegClassID) 274*c72a751dSPhoebe Wang return 1; 275*c72a751dSPhoebe Wang if (RegClassID == X86::TILEPAIRRegClassID) 276*c72a751dSPhoebe Wang return 2; 277*c72a751dSPhoebe Wang } else { 278*c72a751dSPhoebe Wang if (Reg >= X86::TMM0 && Reg <= X86::TMM7) 279*c72a751dSPhoebe Wang return 1; 280*c72a751dSPhoebe Wang if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7) 281*c72a751dSPhoebe Wang return 2; 282*c72a751dSPhoebe Wang } 283*c72a751dSPhoebe Wang return 0; 284*c72a751dSPhoebe Wang } 285*c72a751dSPhoebe Wang 286*c72a751dSPhoebe Wang static bool isTileRegister(MachineRegisterInfo *MRI, Register VirtReg) { 287*c72a751dSPhoebe Wang return getTileDefNum(MRI, VirtReg) > 0; 288*c72a751dSPhoebe Wang } 289*c72a751dSPhoebe Wang 290496156acSLuo, Yuanke static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { 291496156acSLuo, Yuanke // The instruction must have 3 operands: tile def, row, col. 292496156acSLuo, Yuanke if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo()) 293496156acSLuo, Yuanke return false; 294496156acSLuo, Yuanke MachineOperand &MO = MI.getOperand(0); 295496156acSLuo, Yuanke 296*c72a751dSPhoebe Wang if (!MO.isReg()) 297496156acSLuo, Yuanke return false; 298*c72a751dSPhoebe Wang 299*c72a751dSPhoebe Wang return getTileDefNum(MRI, MO.getReg()) > 0; 300496156acSLuo, Yuanke } 301496156acSLuo, Yuanke 302496156acSLuo, Yuanke static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) { 303496156acSLuo, Yuanke MachineInstr *MI = MRI->getVRegDef(TileReg); 304496156acSLuo, Yuanke if (isTileDef(MRI, *MI)) { 305496156acSLuo, Yuanke MachineOperand *RowMO = &MI->getOperand(1); 306496156acSLuo, Yuanke MachineOperand *ColMO = &MI->getOperand(2); 307496156acSLuo, Yuanke return ShapeT(RowMO, ColMO, MRI); 308496156acSLuo, Yuanke } else if (MI->isCopy()) { 309496156acSLuo, Yuanke TileReg = MI->getOperand(1).getReg(); 310496156acSLuo, Yuanke return getShape(MRI, TileReg); 311496156acSLuo, Yuanke } 312496156acSLuo, Yuanke 313496156acSLuo, Yuanke // The def should not be PHI node, because we walk the MBB in reverse post 314496156acSLuo, Yuanke // order. 315496156acSLuo, Yuanke assert(MI->isPHI() && "Unexpected PHI when get shape."); 316496156acSLuo, Yuanke llvm_unreachable("Unexpected MI when get shape."); 317496156acSLuo, Yuanke } 318496156acSLuo, Yuanke 319496156acSLuo, Yuanke // BB0: 320496156acSLuo, Yuanke // spill t0 to s0 321496156acSLuo, Yuanke // BB1: 322496156acSLuo, Yuanke // spill t1 to s1 323496156acSLuo, Yuanke // 324496156acSLuo, Yuanke // BB2: 325496156acSLuo, Yuanke // t = phi [t0, bb0] [t1, bb1] 326496156acSLuo, Yuanke // --> 327496156acSLuo, Yuanke // row = phi [r0, bb0] [r1, bb1] 328496156acSLuo, Yuanke // col = phi [c0, bb0] [c1, bb1] 329496156acSLuo, Yuanke // s = phi [s0, bb0] [s1, bb1] 330496156acSLuo, Yuanke // t = tileload row, col, s 331496156acSLuo, Yuanke // The new instruction is inserted at the end of the phi node. The order 332496156acSLuo, Yuanke // of the original phi node is not ensured. 333496156acSLuo, Yuanke void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB, 334496156acSLuo, Yuanke MachineInstr &PHI) { 335496156acSLuo, Yuanke // 1. Create instruction to get stack slot address of each incoming block. 336496156acSLuo, Yuanke // 2. Create PHI node for the stack address. 337496156acSLuo, Yuanke // 3. Create PHI node for shape. If one of the incoming shape is immediate 338496156acSLuo, Yuanke // use the immediate and delete the PHI node. 339496156acSLuo, Yuanke // 4. Create tileload instruction from the stack address. 340496156acSLuo, Yuanke Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 341496156acSLuo, Yuanke MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), 342496156acSLuo, Yuanke TII->get(X86::PHI), StackAddrReg); 343496156acSLuo, Yuanke Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass); 344496156acSLuo, Yuanke MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), 345496156acSLuo, Yuanke TII->get(X86::PHI), RowReg); 346496156acSLuo, Yuanke Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass); 347496156acSLuo, Yuanke MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), 348496156acSLuo, Yuanke TII->get(X86::PHI), ColReg); 349496156acSLuo, Yuanke // Record the mapping of phi node and its row/column information. 350496156acSLuo, Yuanke VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg}; 351496156acSLuo, Yuanke 352496156acSLuo, Yuanke for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) { 353496156acSLuo, Yuanke // Get the 2 incoming value of tile register and MBB. 354496156acSLuo, Yuanke Register InTileReg = PHI.getOperand(I).getReg(); 355496156acSLuo, Yuanke // Mark it as liveout, so that it will be spilled when visit 356496156acSLuo, Yuanke // the incoming MBB. Otherwise since phi will be deleted, it 357496156acSLuo, Yuanke // would miss spill when visit incoming MBB. 358496156acSLuo, Yuanke MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg)); 359496156acSLuo, Yuanke MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB(); 360496156acSLuo, Yuanke 361496156acSLuo, Yuanke MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg); 362496156acSLuo, Yuanke MachineBasicBlock::iterator InsertPos; 363496156acSLuo, Yuanke if (TileDefMI->isPHI()) { 364496156acSLuo, Yuanke InsertPos = TileDefMI->getParent()->getFirstNonPHI(); 365496156acSLuo, Yuanke if (VisitedPHIs.count(TileDefMI)) { // circular phi reference 366496156acSLuo, Yuanke // def t1 367496156acSLuo, Yuanke // / \ 368496156acSLuo, Yuanke // def t2 t3 = phi(t1, t4) <-- 369496156acSLuo, Yuanke // \ / | 370496156acSLuo, Yuanke // t4 = phi(t2, t3)------------- 371496156acSLuo, Yuanke // 372496156acSLuo, Yuanke // For each (row, column and stack address) append phi incoming value. 373496156acSLuo, Yuanke // Create r3 = phi(r1, r4) 374496156acSLuo, Yuanke // Create r4 = phi(r2, r3) 375496156acSLuo, Yuanke Register InRowReg = VisitedPHIs[TileDefMI].Row; 376496156acSLuo, Yuanke Register InColReg = VisitedPHIs[TileDefMI].Col; 377496156acSLuo, Yuanke Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr; 378496156acSLuo, Yuanke RowPHI.addReg(InRowReg).addMBB(InMBB); 379496156acSLuo, Yuanke ColPHI.addReg(InColReg).addMBB(InMBB); 380496156acSLuo, Yuanke AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); 381496156acSLuo, Yuanke continue; 382496156acSLuo, Yuanke } else { 383496156acSLuo, Yuanke // Recursively convert PHI to tileload 384496156acSLuo, Yuanke convertPHI(TileDefMI->getParent(), *TileDefMI); 385496156acSLuo, Yuanke // The PHI node is coverted to tileload instruction. Get the stack 386496156acSLuo, Yuanke // address from tileload operands. 387496156acSLuo, Yuanke MachineInstr *TileLoad = MRI->getVRegDef(InTileReg); 38854ec8e25SLuo, Yuanke assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV); 389496156acSLuo, Yuanke Register InRowReg = TileLoad->getOperand(1).getReg(); 390496156acSLuo, Yuanke Register InColReg = TileLoad->getOperand(2).getReg(); 391496156acSLuo, Yuanke Register InStackAddrReg = TileLoad->getOperand(3).getReg(); 392496156acSLuo, Yuanke RowPHI.addReg(InRowReg).addMBB(InMBB); 393496156acSLuo, Yuanke ColPHI.addReg(InColReg).addMBB(InMBB); 394496156acSLuo, Yuanke AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); 395496156acSLuo, Yuanke } 396496156acSLuo, Yuanke } else { 397496156acSLuo, Yuanke InsertPos = TileDefMI->getIterator(); 398496156acSLuo, Yuanke 399496156acSLuo, Yuanke // Fill the incoming operand of row/column phi instruction. 400496156acSLuo, Yuanke ShapeT Shape = getShape(MRI, InTileReg); 401496156acSLuo, Yuanke Shape.getRow()->setIsKill(false); 402496156acSLuo, Yuanke Shape.getCol()->setIsKill(false); 403496156acSLuo, Yuanke RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB); 404496156acSLuo, Yuanke ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB); 405496156acSLuo, Yuanke 406496156acSLuo, Yuanke // The incoming tile register live out of its def BB, it would be spilled. 407496156acSLuo, Yuanke // Create MI to get the spill stack slot address for the tile register 408496156acSLuo, Yuanke int FI = getStackSpaceFor(InTileReg); 409496156acSLuo, Yuanke Register InStackAddrReg = 410496156acSLuo, Yuanke MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 411496156acSLuo, Yuanke addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(), 412496156acSLuo, Yuanke TII->get(X86::LEA64r), InStackAddrReg) 413496156acSLuo, Yuanke .addFrameIndex(FI), 414496156acSLuo, Yuanke 0); 415496156acSLuo, Yuanke AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); 416496156acSLuo, Yuanke } 417496156acSLuo, Yuanke } 418496156acSLuo, Yuanke 419496156acSLuo, Yuanke MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); 420496156acSLuo, Yuanke Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 421496156acSLuo, Yuanke BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg) 422496156acSLuo, Yuanke .addImm(64); 423496156acSLuo, Yuanke Register TileReg = PHI.getOperand(0).getReg(); 424496156acSLuo, Yuanke MachineInstr *NewMI = addDirectMem( 425496156acSLuo, Yuanke BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg) 426496156acSLuo, Yuanke .addReg(RowReg) 427496156acSLuo, Yuanke .addReg(ColReg), 428496156acSLuo, Yuanke StackAddrReg); 429496156acSLuo, Yuanke MachineOperand &MO = NewMI->getOperand(5); 430496156acSLuo, Yuanke MO.setReg(StrideReg); 431496156acSLuo, Yuanke MO.setIsKill(true); 432496156acSLuo, Yuanke PHI.eraseFromParent(); 433496156acSLuo, Yuanke VisitedPHIs.erase(&PHI); 434496156acSLuo, Yuanke } 435496156acSLuo, Yuanke 436496156acSLuo, Yuanke static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) { 437496156acSLuo, Yuanke MachineOperand &MO = MI.getOperand(0); 438*c72a751dSPhoebe Wang if (MO.isReg() && MO.getReg().isVirtual() && isTileRegister(MRI, MO.getReg())) 439496156acSLuo, Yuanke return true; 440496156acSLuo, Yuanke return false; 441496156acSLuo, Yuanke } 442496156acSLuo, Yuanke 443496156acSLuo, Yuanke void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) { 444496156acSLuo, Yuanke SmallVector<MachineInstr *, 8> PHIs; 445496156acSLuo, Yuanke 446496156acSLuo, Yuanke for (MachineInstr &MI : MBB) { 447496156acSLuo, Yuanke if (!MI.isPHI()) 448496156acSLuo, Yuanke break; 449496156acSLuo, Yuanke if (!isTileRegDef(MRI, MI)) 450496156acSLuo, Yuanke continue; 451496156acSLuo, Yuanke PHIs.push_back(&MI); 452496156acSLuo, Yuanke } 453496156acSLuo, Yuanke // Canonicalize the phi node first. One tile phi may depeneds previous 454496156acSLuo, Yuanke // phi node. For below case, we need convert %t4. 455496156acSLuo, Yuanke // 456496156acSLuo, Yuanke // BB0: 457496156acSLuo, Yuanke // %t3 = phi (t1 BB1, t2 BB0) 458496156acSLuo, Yuanke // %t4 = phi (t5 BB1, t3 BB0) 459496156acSLuo, Yuanke // --> 460496156acSLuo, Yuanke // %t3 = phi (t1 BB1, t2 BB0) 461496156acSLuo, Yuanke // %t4 = phi (t5 BB1, t2 BB0) 462496156acSLuo, Yuanke // 463496156acSLuo, Yuanke while (!PHIs.empty()) { 464496156acSLuo, Yuanke MachineInstr *PHI = PHIs.pop_back_val(); 465496156acSLuo, Yuanke 466496156acSLuo, Yuanke // Find the operand that is incoming from the same MBB and the def 467496156acSLuo, Yuanke // is also phi node. 468496156acSLuo, Yuanke MachineOperand *InMO = nullptr; 469496156acSLuo, Yuanke MachineInstr *DefMI = nullptr; 470496156acSLuo, Yuanke for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) { 471496156acSLuo, Yuanke Register InTileReg = PHI->getOperand(I).getReg(); 472496156acSLuo, Yuanke MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); 473496156acSLuo, Yuanke DefMI = MRI->getVRegDef(InTileReg); 474496156acSLuo, Yuanke if (InMBB != &MBB || !DefMI->isPHI()) 475496156acSLuo, Yuanke continue; 476496156acSLuo, Yuanke 477496156acSLuo, Yuanke InMO = &PHI->getOperand(I); 478496156acSLuo, Yuanke break; 479496156acSLuo, Yuanke } 480496156acSLuo, Yuanke // If can't find such operand, do nothing. 481496156acSLuo, Yuanke if (!InMO) 482496156acSLuo, Yuanke continue; 483496156acSLuo, Yuanke 484496156acSLuo, Yuanke // Current phi node depends on previous phi node. Break the 485496156acSLuo, Yuanke // dependency. 486496156acSLuo, Yuanke Register DefTileReg; 487496156acSLuo, Yuanke for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) { 488496156acSLuo, Yuanke MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); 489496156acSLuo, Yuanke if (InMBB != &MBB) 490496156acSLuo, Yuanke continue; 491496156acSLuo, Yuanke DefTileReg = DefMI->getOperand(I).getReg(); 492496156acSLuo, Yuanke InMO->setReg(DefTileReg); 493496156acSLuo, Yuanke break; 494496156acSLuo, Yuanke } 495496156acSLuo, Yuanke } 496496156acSLuo, Yuanke } 497496156acSLuo, Yuanke 498496156acSLuo, Yuanke void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) { 499496156acSLuo, Yuanke SmallVector<MachineInstr *, 8> PHIs; 500496156acSLuo, Yuanke for (MachineInstr &MI : MBB) { 501496156acSLuo, Yuanke if (!MI.isPHI()) 502496156acSLuo, Yuanke break; 503496156acSLuo, Yuanke if (!isTileRegDef(MRI, MI)) 504496156acSLuo, Yuanke continue; 505496156acSLuo, Yuanke PHIs.push_back(&MI); 506496156acSLuo, Yuanke } 507496156acSLuo, Yuanke while (!PHIs.empty()) { 508496156acSLuo, Yuanke MachineInstr *MI = PHIs.pop_back_val(); 509496156acSLuo, Yuanke VisitedPHIs.clear(); 510496156acSLuo, Yuanke convertPHI(&MBB, *MI); 511496156acSLuo, Yuanke } 512496156acSLuo, Yuanke } 513496156acSLuo, Yuanke 514496156acSLuo, Yuanke // PreTileConfig should configure the tile registers based on basic 515496156acSLuo, Yuanke // block. 516496156acSLuo, Yuanke bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) { 517496156acSLuo, Yuanke this->MBB = &MBB; 518496156acSLuo, Yuanke bool Change = false; 519496156acSLuo, Yuanke MachineInstr *LastShapeMI = nullptr; 520496156acSLuo, Yuanke MachineInstr *LastTileCfg = nullptr; 521496156acSLuo, Yuanke bool HasUnconfigTile = false; 522496156acSLuo, Yuanke 523496156acSLuo, Yuanke auto Config = [&](MachineInstr &Before) { 524496156acSLuo, Yuanke if (CfgSS == -1) 525496156acSLuo, Yuanke CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(), 526496156acSLuo, Yuanke ST->getTileConfigAlignment(), false); 527496156acSLuo, Yuanke LastTileCfg = addFrameReference( 528aaaf9cedSLuo, Yuanke BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS); 529496156acSLuo, Yuanke LastShapeMI = nullptr; 530496156acSLuo, Yuanke Change = true; 531496156acSLuo, Yuanke }; 532496156acSLuo, Yuanke auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) { 533496156acSLuo, Yuanke for (const MachineOperand &MO : MI.operands()) { 534496156acSLuo, Yuanke if (!MO.isReg()) 535496156acSLuo, Yuanke continue; 536496156acSLuo, Yuanke Register Reg = MO.getReg(); 537*c72a751dSPhoebe Wang if (Reg.isVirtual() && isTileRegister(MRI, Reg)) 538496156acSLuo, Yuanke return true; 539496156acSLuo, Yuanke } 540496156acSLuo, Yuanke return false; 541496156acSLuo, Yuanke }; 542496156acSLuo, Yuanke for (MachineInstr &MI : reverse(MBB)) { 543496156acSLuo, Yuanke // We have transformed phi node before configuring BB. 544496156acSLuo, Yuanke if (MI.isPHI()) 545496156acSLuo, Yuanke break; 546496156acSLuo, Yuanke // Don't collect the shape of used tile, the tile should be defined 547496156acSLuo, Yuanke // before the tile use. Spill and reload would happen if there is only 548496156acSLuo, Yuanke // tile use after ldtilecfg, so the shape can be collected from reload. 549496156acSLuo, Yuanke // Take below code for example. %t would be reloaded before tilestore 550496156acSLuo, Yuanke // call 551496156acSLuo, Yuanke // .... 552496156acSLuo, Yuanke // tilestore %r, %c, %t 553496156acSLuo, Yuanke // --> 554496156acSLuo, Yuanke // call 555496156acSLuo, Yuanke // ldtilecfg 556496156acSLuo, Yuanke // %t = tileload %r, %c 557496156acSLuo, Yuanke // tilestore %r, %c, %t 558496156acSLuo, Yuanke if (HasTileOperand(MRI, MI)) 559496156acSLuo, Yuanke HasUnconfigTile = true; 560496156acSLuo, Yuanke // According to AMX ABI, all the tile registers including config register 561496156acSLuo, Yuanke // are volatile. Caller need to save/restore config register. 562496156acSLuo, Yuanke if (MI.isCall() && HasUnconfigTile) { 563496156acSLuo, Yuanke MachineBasicBlock::iterator I; 564496156acSLuo, Yuanke if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) 565496156acSLuo, Yuanke I = ++LastShapeMI->getIterator(); 566496156acSLuo, Yuanke else 567496156acSLuo, Yuanke I = ++MI.getIterator(); 568496156acSLuo, Yuanke Config(*I); 569496156acSLuo, Yuanke HasUnconfigTile = false; 570496156acSLuo, Yuanke continue; 571496156acSLuo, Yuanke } 572496156acSLuo, Yuanke if (!isTileDef(MRI, MI)) 573496156acSLuo, Yuanke continue; 574496156acSLuo, Yuanke // 575496156acSLuo, Yuanke //--------------------------------------------------------------------- 576496156acSLuo, Yuanke // Don't handle COPY instruction. If the src and dst of the COPY can be 577496156acSLuo, Yuanke // in the same config in below case, we just check the shape of t0. 578496156acSLuo, Yuanke // def row0 579496156acSLuo, Yuanke // def col0 580496156acSLuo, Yuanke // ldtilecfg 581496156acSLuo, Yuanke // t0 = tielzero(row0, col0) 582496156acSLuo, Yuanke // t1 = copy t0 583496156acSLuo, Yuanke // ... 584496156acSLuo, Yuanke // If the src and dst of the COPY can NOT be in the same config in below 585496156acSLuo, Yuanke // case. Reload would be generated befor the copy instruction. 586496156acSLuo, Yuanke // def row0 587496156acSLuo, Yuanke // def col0 588496156acSLuo, Yuanke // t0 = tielzero(row0, col0) 589496156acSLuo, Yuanke // spill t0 590496156acSLuo, Yuanke // ... 591496156acSLuo, Yuanke // def row1 592496156acSLuo, Yuanke // def col1 593496156acSLuo, Yuanke // ldtilecfg 594496156acSLuo, Yuanke // t1 = tilezero(row1, col1) 595496156acSLuo, Yuanke // reload t0 596496156acSLuo, Yuanke // t1 = copy t0 597496156acSLuo, Yuanke //--------------------------------------------------------------------- 598496156acSLuo, Yuanke // 599496156acSLuo, Yuanke // If MI dominate the last shape def instruction, we need insert 600496156acSLuo, Yuanke // ldtilecfg after LastShapeMI now. The config doesn't include 601496156acSLuo, Yuanke // current MI. 602496156acSLuo, Yuanke // def row0 603496156acSLuo, Yuanke // def col0 604496156acSLuo, Yuanke // tilezero(row0, col0) <- MI 605496156acSLuo, Yuanke // def row1 606496156acSLuo, Yuanke // def col1 607496156acSLuo, Yuanke // ldtilecfg <- insert 608496156acSLuo, Yuanke // tilezero(row1, col1) 609496156acSLuo, Yuanke if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) 610496156acSLuo, Yuanke Config(*(++LastShapeMI->getIterator())); 611496156acSLuo, Yuanke MachineOperand *RowMO = &MI.getOperand(1); 612496156acSLuo, Yuanke MachineOperand *ColMO = &MI.getOperand(2); 613496156acSLuo, Yuanke MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg()); 614496156acSLuo, Yuanke MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg()); 615496156acSLuo, Yuanke // If the shape is defined in current MBB, check the domination. 616496156acSLuo, Yuanke // FIXME how about loop? 617496156acSLuo, Yuanke if (RowMI->getParent() == &MBB) { 618496156acSLuo, Yuanke if (!LastShapeMI) 619496156acSLuo, Yuanke LastShapeMI = RowMI; 620496156acSLuo, Yuanke else if (dominates(MBB, LastShapeMI, RowMI)) 621496156acSLuo, Yuanke LastShapeMI = RowMI; 622496156acSLuo, Yuanke } 623496156acSLuo, Yuanke if (ColMI->getParent() == &MBB) { 624496156acSLuo, Yuanke if (!LastShapeMI) 625496156acSLuo, Yuanke LastShapeMI = ColMI; 626496156acSLuo, Yuanke else if (dominates(MBB, LastShapeMI, ColMI)) 627496156acSLuo, Yuanke LastShapeMI = ColMI; 628496156acSLuo, Yuanke } 629*c72a751dSPhoebe Wang unsigned TileDefNum = getTileDefNum(MRI, MI.getOperand(0).getReg()); 630*c72a751dSPhoebe Wang if (TileDefNum > 1) { 631*c72a751dSPhoebe Wang for (unsigned I = 1; I < TileDefNum; I++) { 632*c72a751dSPhoebe Wang MachineOperand *ColxMO = &MI.getOperand(2 + I); 633*c72a751dSPhoebe Wang MachineInstr *ColxMI = MRI->getVRegDef(ColxMO->getReg()); 634*c72a751dSPhoebe Wang if (ColxMI->getParent() == &MBB) { 635*c72a751dSPhoebe Wang if (!LastShapeMI) 636*c72a751dSPhoebe Wang LastShapeMI = ColxMI; 637*c72a751dSPhoebe Wang else if (dominates(MBB, LastShapeMI, ColxMI)) 638*c72a751dSPhoebe Wang LastShapeMI = ColxMI; 639*c72a751dSPhoebe Wang } 640*c72a751dSPhoebe Wang } 641*c72a751dSPhoebe Wang } 642496156acSLuo, Yuanke // If there is user live out of the tilecfg, spill it and reload in 643496156acSLuo, Yuanke // before the user. 644496156acSLuo, Yuanke Register TileReg = MI.getOperand(0).getReg(); 645496156acSLuo, Yuanke if (mayLiveOut(TileReg, LastTileCfg)) 646496156acSLuo, Yuanke spill(++MI.getIterator(), TileReg, false); 647496156acSLuo, Yuanke for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) { 648496156acSLuo, Yuanke if (UseMI.getParent() == &MBB) { 649496156acSLuo, Yuanke // check user should not across ldtilecfg 650496156acSLuo, Yuanke if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI)) 651496156acSLuo, Yuanke continue; 652496156acSLuo, Yuanke // reload befor UseMI 653496156acSLuo, Yuanke reload(UseMI.getIterator(), TileReg, RowMO, ColMO); 654496156acSLuo, Yuanke } else { 655496156acSLuo, Yuanke // Don't reload for phi instruction, we handle phi reload separately. 656496156acSLuo, Yuanke // TODO: merge the reload for the same user MBB. 657496156acSLuo, Yuanke if (!UseMI.isPHI()) 658496156acSLuo, Yuanke reload(UseMI.getIterator(), TileReg, RowMO, ColMO); 659496156acSLuo, Yuanke } 660496156acSLuo, Yuanke } 661496156acSLuo, Yuanke } 662496156acSLuo, Yuanke 663496156acSLuo, Yuanke // Configure tile registers at the head of the MBB 664496156acSLuo, Yuanke if (HasUnconfigTile) { 665496156acSLuo, Yuanke MachineInstr *Before; 666496156acSLuo, Yuanke if (LastShapeMI == nullptr || LastShapeMI->isPHI()) 667496156acSLuo, Yuanke Before = &*MBB.getFirstNonPHI(); 668496156acSLuo, Yuanke else 669496156acSLuo, Yuanke Before = &*(++LastShapeMI->getIterator()); 670496156acSLuo, Yuanke 671496156acSLuo, Yuanke Config(*Before); 672496156acSLuo, Yuanke } 673496156acSLuo, Yuanke 674496156acSLuo, Yuanke return Change; 675496156acSLuo, Yuanke } 676496156acSLuo, Yuanke 677496156acSLuo, Yuanke bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) { 6789a2c8418Saengelke X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); 6799a2c8418Saengelke // Early exit in the common case of non-AMX code. 6809a2c8418Saengelke if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA) 6819a2c8418Saengelke return false; 6829a2c8418Saengelke 683496156acSLuo, Yuanke MF = &MFunc; 684496156acSLuo, Yuanke MRI = &MFunc.getRegInfo(); 685496156acSLuo, Yuanke ST = &MFunc.getSubtarget<X86Subtarget>(); 686496156acSLuo, Yuanke TII = ST->getInstrInfo(); 687496156acSLuo, Yuanke MFI = &MFunc.getFrameInfo(); 688496156acSLuo, Yuanke TRI = ST->getRegisterInfo(); 689496156acSLuo, Yuanke CfgSS = -1; 690496156acSLuo, Yuanke 691496156acSLuo, Yuanke unsigned NumVirtRegs = MRI->getNumVirtRegs(); 6923b1de7abSLuo, Yuanke 693496156acSLuo, Yuanke StackSlotForVirtReg.resize(NumVirtRegs); 694496156acSLuo, Yuanke MayLiveAcrossBlocks.clear(); 695496156acSLuo, Yuanke // We will create register during config. *3 is to make sure 696496156acSLuo, Yuanke // the virtual register number doesn't exceed the size of 697496156acSLuo, Yuanke // the bit vector. 698496156acSLuo, Yuanke MayLiveAcrossBlocks.resize(NumVirtRegs * 3); 699496156acSLuo, Yuanke bool Change = false; 700496156acSLuo, Yuanke assert(MRI->isSSA()); 701496156acSLuo, Yuanke 702496156acSLuo, Yuanke // Canonicalize the phi node first. 703496156acSLuo, Yuanke for (MachineBasicBlock &MBB : MFunc) 704496156acSLuo, Yuanke canonicalizePHIs(MBB); 705496156acSLuo, Yuanke 706496156acSLuo, Yuanke // Loop over all of the basic blocks in reverse post order and insert 707496156acSLuo, Yuanke // ldtilecfg for tile registers. The reserse post order is to facilitate 708496156acSLuo, Yuanke // PHI node convert. 709496156acSLuo, Yuanke ReversePostOrderTraversal<MachineFunction *> RPOT(MF); 710496156acSLuo, Yuanke for (MachineBasicBlock *MBB : RPOT) { 711496156acSLuo, Yuanke convertPHIs(*MBB); 712496156acSLuo, Yuanke Change |= configBasicBlock(*MBB); 713496156acSLuo, Yuanke } 714496156acSLuo, Yuanke 715496156acSLuo, Yuanke if (Change) 716496156acSLuo, Yuanke InitializeTileConfigStackSpace(); 717496156acSLuo, Yuanke 718496156acSLuo, Yuanke StackSlotForVirtReg.clear(); 719496156acSLuo, Yuanke return Change; 720496156acSLuo, Yuanke } 721496156acSLuo, Yuanke 722496156acSLuo, Yuanke FunctionPass *llvm::createX86FastPreTileConfigPass() { 723496156acSLuo, Yuanke return new X86FastPreTileConfig(); 724496156acSLuo, Yuanke } 725