1*fe6060f1SDimitry Andric //===-- X86PreTileConfig.cpp - Tile Register Pre-configure-----------------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9*fe6060f1SDimitry Andric /// \file Pass to pre-config the shapes of AMX registers 10*fe6060f1SDimitry Andric /// AMX register needs to be configured before use. The shapes of AMX register 11*fe6060f1SDimitry Andric /// are encoded in the 1st and 2nd machine operand of AMX pseudo instructions. 12e8d8bef9SDimitry Andric /// 13*fe6060f1SDimitry Andric /// The instruction ldtilecfg is used to config the shapes. It must be reachable 14*fe6060f1SDimitry Andric /// for all variable shapes. ldtilecfg will be inserted more than once if we 15*fe6060f1SDimitry Andric /// cannot find a dominating point for all AMX instructions. 16e8d8bef9SDimitry Andric /// 17*fe6060f1SDimitry Andric /// The configure register is caller saved according to ABI. We need to insert 18*fe6060f1SDimitry Andric /// ldtilecfg again after the call instruction if callee clobbers any AMX 19*fe6060f1SDimitry Andric /// registers. 20e8d8bef9SDimitry Andric /// 21*fe6060f1SDimitry Andric /// This pass calculates all points that ldtilecfg need to be inserted to and 22*fe6060f1SDimitry Andric /// insert them. It reports error if the reachability conditions aren't met. 23e8d8bef9SDimitry Andric // 24e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 25e8d8bef9SDimitry Andric 26e8d8bef9SDimitry Andric #include "X86.h" 27e8d8bef9SDimitry Andric #include "X86InstrBuilder.h" 28e8d8bef9SDimitry Andric #include "X86RegisterInfo.h" 29e8d8bef9SDimitry Andric #include "X86Subtarget.h" 30e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 31e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 32*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 33e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 34e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h" 35e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 36e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 37e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 38e8d8bef9SDimitry Andric 39e8d8bef9SDimitry Andric using namespace llvm; 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-pre-config" 42*fe6060f1SDimitry Andric #define REPORT_CONFIG_FAIL \ 43*fe6060f1SDimitry Andric report_fatal_error( \ 44*fe6060f1SDimitry Andric MF.getName() + \ 45*fe6060f1SDimitry Andric ": Failed to config tile register, please define the shape earlier"); 46e8d8bef9SDimitry Andric 47e8d8bef9SDimitry Andric namespace { 48e8d8bef9SDimitry Andric 49*fe6060f1SDimitry Andric struct MIRef { 50*fe6060f1SDimitry Andric MachineInstr *MI = nullptr; 51*fe6060f1SDimitry Andric MachineBasicBlock *MBB = nullptr; 52*fe6060f1SDimitry Andric // A virtual position for instruction that will be inserted after MI. 53*fe6060f1SDimitry Andric size_t Pos = 0; 54*fe6060f1SDimitry Andric MIRef() = default; 55*fe6060f1SDimitry Andric MIRef(MachineBasicBlock *MBB) : MBB(MBB) { 56*fe6060f1SDimitry Andric for (auto I = MBB->begin(), E = MBB->end(); I != E && I->isPHI(); 57*fe6060f1SDimitry Andric ++I, ++Pos) 58*fe6060f1SDimitry Andric MI = &*I; 59*fe6060f1SDimitry Andric } 60*fe6060f1SDimitry Andric MIRef(MachineInstr *MI) 61*fe6060f1SDimitry Andric : MI(MI), MBB(MI->getParent()), 62*fe6060f1SDimitry Andric Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {} 63*fe6060f1SDimitry Andric MIRef(MachineInstr *MI, MachineBasicBlock *MBB) 64*fe6060f1SDimitry Andric : MI(MI), MBB(MBB), 65*fe6060f1SDimitry Andric Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {} 66*fe6060f1SDimitry Andric MIRef(MachineInstr *MI, MachineBasicBlock *MBB, size_t Pos) 67*fe6060f1SDimitry Andric : MI(MI), MBB(MBB), Pos(Pos) {} 68*fe6060f1SDimitry Andric operator bool() const { return MBB != nullptr; } 69*fe6060f1SDimitry Andric bool operator==(const MIRef &RHS) const { 70*fe6060f1SDimitry Andric return MI == RHS.MI && MBB == RHS.MBB; 71*fe6060f1SDimitry Andric } 72*fe6060f1SDimitry Andric bool operator!=(const MIRef &RHS) const { return !(*this == RHS); } 73*fe6060f1SDimitry Andric bool operator<(const MIRef &RHS) const { 74*fe6060f1SDimitry Andric // Comparison between different BBs happens when inserting a MIRef into set. 75*fe6060f1SDimitry Andric // So we compare MBB first to make the insertion happy. 76*fe6060f1SDimitry Andric return MBB < RHS.MBB || (MBB == RHS.MBB && Pos < RHS.Pos); 77*fe6060f1SDimitry Andric } 78*fe6060f1SDimitry Andric bool operator>(const MIRef &RHS) const { 79*fe6060f1SDimitry Andric // Comparison between different BBs happens when inserting a MIRef into set. 80*fe6060f1SDimitry Andric // So we compare MBB first to make the insertion happy. 81*fe6060f1SDimitry Andric return MBB > RHS.MBB || (MBB == RHS.MBB && Pos > RHS.Pos); 82*fe6060f1SDimitry Andric } 83*fe6060f1SDimitry Andric }; 84e8d8bef9SDimitry Andric 85*fe6060f1SDimitry Andric struct BBInfo { 86*fe6060f1SDimitry Andric MIRef FirstAMX; 87*fe6060f1SDimitry Andric MIRef LastCall; 88*fe6060f1SDimitry Andric bool HasAMXRegLiveIn = false; 89*fe6060f1SDimitry Andric bool TileCfgForbidden = false; 90*fe6060f1SDimitry Andric bool NeedTileCfgLiveIn = false; 91*fe6060f1SDimitry Andric }; 92*fe6060f1SDimitry Andric 93*fe6060f1SDimitry Andric class X86PreTileConfig : public MachineFunctionPass { 94*fe6060f1SDimitry Andric MachineRegisterInfo *MRI; 95*fe6060f1SDimitry Andric const MachineLoopInfo *MLI; 96*fe6060f1SDimitry Andric SmallSet<MachineInstr *, 8> DefVisited; 97*fe6060f1SDimitry Andric DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo; 98*fe6060f1SDimitry Andric DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs; 99*fe6060f1SDimitry Andric 100*fe6060f1SDimitry Andric /// Check if the callee will clobber AMX registers. 101*fe6060f1SDimitry Andric bool isDestructiveCall(MachineInstr &MI, BitVector UsableRegs) { 102*fe6060f1SDimitry Andric auto Iter = llvm::find_if( 103*fe6060f1SDimitry Andric MI.operands(), [](MachineOperand &MO) { return MO.isRegMask(); }); 104*fe6060f1SDimitry Andric if (Iter == MI.operands_end()) 105*fe6060f1SDimitry Andric return false; 106*fe6060f1SDimitry Andric UsableRegs.clearBitsInMask(Iter->getRegMask()); 107*fe6060f1SDimitry Andric return !UsableRegs.none(); 108*fe6060f1SDimitry Andric } 109*fe6060f1SDimitry Andric 110*fe6060f1SDimitry Andric /// Check if MI is AMX pseudo instruction. 111*fe6060f1SDimitry Andric bool isAMXInstruction(MachineInstr &MI) { 112*fe6060f1SDimitry Andric if (MI.isPHI() || MI.isDebugInstr() || MI.getNumOperands() < 3) 113*fe6060f1SDimitry Andric return false; 114*fe6060f1SDimitry Andric MachineOperand &MO = MI.getOperand(0); 115*fe6060f1SDimitry Andric // We can simply check if it is AMX instruction by its def. 116*fe6060f1SDimitry Andric // But we should exclude old API which uses physical registers. 117*fe6060f1SDimitry Andric if (MO.isReg() && MO.getReg().isVirtual() && 118*fe6060f1SDimitry Andric MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) { 119*fe6060f1SDimitry Andric collectShapeInfo(MI); 120*fe6060f1SDimitry Andric return true; 121*fe6060f1SDimitry Andric } 122*fe6060f1SDimitry Andric // PTILESTOREDV is the only exception that doesn't def a AMX register. 123*fe6060f1SDimitry Andric return MI.getOpcode() == X86::PTILESTOREDV; 124*fe6060f1SDimitry Andric } 125*fe6060f1SDimitry Andric 126*fe6060f1SDimitry Andric /// Check if it is an edge from loop bottom to loop head. 127*fe6060f1SDimitry Andric bool isLoopBackEdge(MachineBasicBlock *Header, MachineBasicBlock *Bottom) { 128*fe6060f1SDimitry Andric if (!MLI->isLoopHeader(Header)) 129*fe6060f1SDimitry Andric return false; 130*fe6060f1SDimitry Andric auto *ML = MLI->getLoopFor(Header); 131*fe6060f1SDimitry Andric if (ML->contains(Bottom) && ML->isLoopLatch(Bottom)) 132*fe6060f1SDimitry Andric return true; 133*fe6060f1SDimitry Andric 134*fe6060f1SDimitry Andric return false; 135*fe6060f1SDimitry Andric } 136*fe6060f1SDimitry Andric 137*fe6060f1SDimitry Andric /// Collect the shape def information for later use. 138*fe6060f1SDimitry Andric void collectShapeInfo(MachineInstr &MI); 139*fe6060f1SDimitry Andric 140*fe6060f1SDimitry Andric /// Try to hoist shapes definded below AMX instructions. 141*fe6060f1SDimitry Andric bool hoistShapesInBB(MachineBasicBlock *MBB, SmallVectorImpl<MIRef> &Shapes) { 142*fe6060f1SDimitry Andric MIRef &FirstAMX = BBVisitedInfo[MBB].FirstAMX; 143*fe6060f1SDimitry Andric auto FirstShapeBelowAMX = llvm::lower_bound(Shapes, FirstAMX); 144*fe6060f1SDimitry Andric auto InsertPoint = FirstAMX.MI->getIterator(); 145*fe6060f1SDimitry Andric for (auto I = FirstShapeBelowAMX, E = Shapes.end(); I != E; ++I) { 146*fe6060f1SDimitry Andric // Do not hoist instructions that access memory. 147*fe6060f1SDimitry Andric if (I->MI->mayLoadOrStore()) 148*fe6060f1SDimitry Andric return false; 149*fe6060f1SDimitry Andric for (auto &MO : I->MI->operands()) { 150*fe6060f1SDimitry Andric if (MO.isDef()) 151*fe6060f1SDimitry Andric continue; 152*fe6060f1SDimitry Andric // Do not hoist instructions if the sources' def under AMX instruction. 153*fe6060f1SDimitry Andric // TODO: We can handle isMoveImmediate MI here. 154*fe6060f1SDimitry Andric if (MO.isReg() && MIRef(MRI->getVRegDef(MO.getReg())) > FirstAMX) 155*fe6060f1SDimitry Andric return false; 156*fe6060f1SDimitry Andric // TODO: Maybe need more checks here. 157*fe6060f1SDimitry Andric } 158*fe6060f1SDimitry Andric MBB->insert(InsertPoint, I->MI->removeFromParent()); 159*fe6060f1SDimitry Andric } 160*fe6060f1SDimitry Andric // We only need to mark the last shape in the BB now. 161*fe6060f1SDimitry Andric Shapes.clear(); 162*fe6060f1SDimitry Andric Shapes.push_back(MIRef(&*--InsertPoint, MBB)); 163*fe6060f1SDimitry Andric return true; 164*fe6060f1SDimitry Andric } 165e8d8bef9SDimitry Andric 166e8d8bef9SDimitry Andric public: 167e8d8bef9SDimitry Andric X86PreTileConfig() : MachineFunctionPass(ID) {} 168e8d8bef9SDimitry Andric 169e8d8bef9SDimitry Andric /// Return the pass name. 170e8d8bef9SDimitry Andric StringRef getPassName() const override { 171e8d8bef9SDimitry Andric return "Tile Register Pre-configure"; 172e8d8bef9SDimitry Andric } 173e8d8bef9SDimitry Andric 174e8d8bef9SDimitry Andric /// X86PreTileConfig analysis usage. 175*fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 176*fe6060f1SDimitry Andric AU.setPreservesAll(); 177*fe6060f1SDimitry Andric AU.addRequired<MachineLoopInfo>(); 178*fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 179*fe6060f1SDimitry Andric } 180e8d8bef9SDimitry Andric 181*fe6060f1SDimitry Andric /// Clear MF related structures. 182*fe6060f1SDimitry Andric void releaseMemory() override { 183*fe6060f1SDimitry Andric ShapeBBs.clear(); 184*fe6060f1SDimitry Andric DefVisited.clear(); 185*fe6060f1SDimitry Andric BBVisitedInfo.clear(); 186*fe6060f1SDimitry Andric } 187*fe6060f1SDimitry Andric 188*fe6060f1SDimitry Andric /// Perform ldtilecfg instructions inserting. 189*fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 190e8d8bef9SDimitry Andric 191e8d8bef9SDimitry Andric static char ID; 192e8d8bef9SDimitry Andric }; 193e8d8bef9SDimitry Andric 194e8d8bef9SDimitry Andric } // end anonymous namespace 195e8d8bef9SDimitry Andric 196e8d8bef9SDimitry Andric char X86PreTileConfig::ID = 0; 197e8d8bef9SDimitry Andric 198e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86PreTileConfig, "tilepreconfig", 199*fe6060f1SDimitry Andric "Tile Register Pre-configure", false, false) 200*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 201e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig", 202*fe6060f1SDimitry Andric "Tile Register Pre-configure", false, false) 203e8d8bef9SDimitry Andric 204*fe6060f1SDimitry Andric void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) { 205*fe6060f1SDimitry Andric auto RecordShape = [&](MachineInstr *MI, MachineBasicBlock *MBB) { 206*fe6060f1SDimitry Andric MIRef MIR(MI, MBB); 207*fe6060f1SDimitry Andric auto I = llvm::lower_bound(ShapeBBs[MBB], MIR); 208*fe6060f1SDimitry Andric if (I == ShapeBBs[MBB].end() || *I != MIR) 209*fe6060f1SDimitry Andric ShapeBBs[MBB].insert(I, MIR); 210*fe6060f1SDimitry Andric }; 211*fe6060f1SDimitry Andric 212*fe6060f1SDimitry Andric SmallVector<Register, 8> WorkList( 213*fe6060f1SDimitry Andric {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); 214*fe6060f1SDimitry Andric while (!WorkList.empty()) { 215*fe6060f1SDimitry Andric Register R = WorkList.pop_back_val(); 216*fe6060f1SDimitry Andric MachineInstr *DefMI = MRI->getVRegDef(R); 217*fe6060f1SDimitry Andric assert(DefMI && "R must has one define instruction"); 218*fe6060f1SDimitry Andric MachineBasicBlock *DefMBB = DefMI->getParent(); 219*fe6060f1SDimitry Andric if (DefMI->isMoveImmediate() || !DefVisited.insert(DefMI).second) 220*fe6060f1SDimitry Andric continue; 221*fe6060f1SDimitry Andric if (DefMI->isPHI()) { 222*fe6060f1SDimitry Andric for (unsigned I = 1; I < DefMI->getNumOperands(); I += 2) 223*fe6060f1SDimitry Andric if (isLoopBackEdge(DefMBB, DefMI->getOperand(I + 1).getMBB())) 224*fe6060f1SDimitry Andric RecordShape(DefMI, DefMBB); // In this case, PHI is also a shape def. 225*fe6060f1SDimitry Andric else 226*fe6060f1SDimitry Andric WorkList.push_back(DefMI->getOperand(I).getReg()); 227*fe6060f1SDimitry Andric } else { 228*fe6060f1SDimitry Andric RecordShape(DefMI, DefMBB); 229*fe6060f1SDimitry Andric } 230*fe6060f1SDimitry Andric } 231e8d8bef9SDimitry Andric } 232e8d8bef9SDimitry Andric 233*fe6060f1SDimitry Andric bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) { 234*fe6060f1SDimitry Andric const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 235*fe6060f1SDimitry Andric const TargetInstrInfo *TII = ST.getInstrInfo(); 236*fe6060f1SDimitry Andric const TargetRegisterInfo *TRI = ST.getRegisterInfo(); 237*fe6060f1SDimitry Andric const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID); 238e8d8bef9SDimitry Andric 239*fe6060f1SDimitry Andric BitVector AMXRegs(TRI->getNumRegs()); 240*fe6060f1SDimitry Andric for (unsigned I = 0; I < RC->getNumRegs(); I++) 241*fe6060f1SDimitry Andric AMXRegs.set(X86::TMM0 + I); 242*fe6060f1SDimitry Andric 243*fe6060f1SDimitry Andric // Iterate MF to collect information. 244*fe6060f1SDimitry Andric MRI = &MF.getRegInfo(); 245*fe6060f1SDimitry Andric MLI = &getAnalysis<MachineLoopInfo>(); 246*fe6060f1SDimitry Andric SmallSet<MIRef, 8> CfgNeedInsert; 247*fe6060f1SDimitry Andric SmallVector<MachineBasicBlock *, 8> CfgLiveInBBs; 248*fe6060f1SDimitry Andric for (auto &MBB : MF) { 249*fe6060f1SDimitry Andric size_t Pos = 0; 250*fe6060f1SDimitry Andric for (auto &MI : MBB) { 251*fe6060f1SDimitry Andric ++Pos; 252*fe6060f1SDimitry Andric if (isAMXInstruction(MI)) { 253*fe6060f1SDimitry Andric // If there's call before the AMX, we need to reload tile config. 254*fe6060f1SDimitry Andric if (BBVisitedInfo[&MBB].LastCall) 255*fe6060f1SDimitry Andric CfgNeedInsert.insert(BBVisitedInfo[&MBB].LastCall); 256*fe6060f1SDimitry Andric else // Otherwise, we need tile config to live in this BB. 257*fe6060f1SDimitry Andric BBVisitedInfo[&MBB].NeedTileCfgLiveIn = true; 258*fe6060f1SDimitry Andric // Always record the first AMX in case there's shape def after it. 259*fe6060f1SDimitry Andric if (!BBVisitedInfo[&MBB].FirstAMX) 260*fe6060f1SDimitry Andric BBVisitedInfo[&MBB].FirstAMX = MIRef(&MI, &MBB, Pos); 261*fe6060f1SDimitry Andric } else if (MI.isCall() && isDestructiveCall(MI, AMXRegs)) { 262*fe6060f1SDimitry Andric // Record the call only if the callee clobbers all AMX registers. 263*fe6060f1SDimitry Andric BBVisitedInfo[&MBB].LastCall = MIRef(&MI, &MBB, Pos); 264*fe6060f1SDimitry Andric } 265*fe6060f1SDimitry Andric } 266*fe6060f1SDimitry Andric if (BBVisitedInfo[&MBB].NeedTileCfgLiveIn) { 267*fe6060f1SDimitry Andric if (&MBB == &MF.front()) 268*fe6060f1SDimitry Andric CfgNeedInsert.insert(MIRef(&MBB)); 269*fe6060f1SDimitry Andric else 270*fe6060f1SDimitry Andric CfgLiveInBBs.push_back(&MBB); 271*fe6060f1SDimitry Andric } 272*fe6060f1SDimitry Andric if (BBVisitedInfo[&MBB].FirstAMX || BBVisitedInfo[&MBB].HasAMXRegLiveIn) 273*fe6060f1SDimitry Andric for (auto *Succ : MBB.successors()) 274*fe6060f1SDimitry Andric if (!isLoopBackEdge(Succ, &MBB)) 275*fe6060f1SDimitry Andric BBVisitedInfo[Succ].HasAMXRegLiveIn = true; 276*fe6060f1SDimitry Andric } 277*fe6060f1SDimitry Andric 278*fe6060f1SDimitry Andric // Update NeedTileCfgLiveIn for predecessors. 279*fe6060f1SDimitry Andric while (!CfgLiveInBBs.empty()) { 280*fe6060f1SDimitry Andric MachineBasicBlock *MBB = CfgLiveInBBs.pop_back_val(); 281*fe6060f1SDimitry Andric for (auto *Pred : MBB->predecessors()) { 282*fe6060f1SDimitry Andric if (BBVisitedInfo[Pred].LastCall) { 283*fe6060f1SDimitry Andric CfgNeedInsert.insert(BBVisitedInfo[Pred].LastCall); 284*fe6060f1SDimitry Andric } else if (!BBVisitedInfo[Pred].NeedTileCfgLiveIn) { 285*fe6060f1SDimitry Andric BBVisitedInfo[Pred].NeedTileCfgLiveIn = true; 286*fe6060f1SDimitry Andric if (Pred == &MF.front()) 287*fe6060f1SDimitry Andric CfgNeedInsert.insert(MIRef(Pred)); 288*fe6060f1SDimitry Andric else 289*fe6060f1SDimitry Andric CfgLiveInBBs.push_back(Pred); 290*fe6060f1SDimitry Andric } 291*fe6060f1SDimitry Andric } 292*fe6060f1SDimitry Andric } 293*fe6060f1SDimitry Andric 294*fe6060f1SDimitry Andric // There's no AMX instruction if we didn't find a tile config live in point. 295*fe6060f1SDimitry Andric if (CfgNeedInsert.empty()) 296*fe6060f1SDimitry Andric return false; 297*fe6060f1SDimitry Andric 298*fe6060f1SDimitry Andric // Avoid to insert ldtilecfg before any shape defs. 299*fe6060f1SDimitry Andric SmallVector<MachineBasicBlock *, 8> WorkList; 300*fe6060f1SDimitry Andric for (auto &I : ShapeBBs) { 301*fe6060f1SDimitry Andric // TODO: We can hoist shapes across BBs here. 302*fe6060f1SDimitry Andric if (BBVisitedInfo[I.first].HasAMXRegLiveIn) 303*fe6060f1SDimitry Andric REPORT_CONFIG_FAIL 304*fe6060f1SDimitry Andric if (BBVisitedInfo[I.first].FirstAMX && 305*fe6060f1SDimitry Andric BBVisitedInfo[I.first].FirstAMX < I.second.back() && 306*fe6060f1SDimitry Andric !hoistShapesInBB(I.first, I.second)) 307*fe6060f1SDimitry Andric REPORT_CONFIG_FAIL 308*fe6060f1SDimitry Andric WorkList.push_back(I.first); 309*fe6060f1SDimitry Andric } 310*fe6060f1SDimitry Andric while (!WorkList.empty()) { 311*fe6060f1SDimitry Andric MachineBasicBlock *MBB = WorkList.pop_back_val(); 312*fe6060f1SDimitry Andric for (auto *Pred : MBB->predecessors()) { 313*fe6060f1SDimitry Andric if (!BBVisitedInfo[Pred].TileCfgForbidden && !isLoopBackEdge(MBB, Pred)) { 314*fe6060f1SDimitry Andric BBVisitedInfo[Pred].TileCfgForbidden = true; 315*fe6060f1SDimitry Andric WorkList.push_back(Pred); 316*fe6060f1SDimitry Andric } 317*fe6060f1SDimitry Andric } 318*fe6060f1SDimitry Andric } 319*fe6060f1SDimitry Andric 320*fe6060f1SDimitry Andric DebugLoc DL; 321*fe6060f1SDimitry Andric SmallSet<MIRef, 8> VisitedOrInserted; 322*fe6060f1SDimitry Andric int SS = MF.getFrameInfo().CreateStackObject( 323*fe6060f1SDimitry Andric ST.getTileConfigSize(), ST.getTileConfigAlignment(), false); 324*fe6060f1SDimitry Andric 325*fe6060f1SDimitry Andric // Try to insert for the tile config live in points. 326*fe6060f1SDimitry Andric for (auto I : CfgNeedInsert) { 327*fe6060f1SDimitry Andric SmallSet<MIRef, 8> InsertPoints; 328*fe6060f1SDimitry Andric SmallVector<MIRef, 8> WorkList({I}); 329*fe6060f1SDimitry Andric while (!WorkList.empty()) { 330*fe6060f1SDimitry Andric MIRef I = WorkList.pop_back_val(); 331*fe6060f1SDimitry Andric if (!VisitedOrInserted.count(I)) { 332*fe6060f1SDimitry Andric if (!BBVisitedInfo[I.MBB].TileCfgForbidden) { 333*fe6060f1SDimitry Andric // If the BB is all shapes reachable, stop sink and try to insert. 334*fe6060f1SDimitry Andric InsertPoints.insert(I); 335*fe6060f1SDimitry Andric } else { 336*fe6060f1SDimitry Andric // Avoid the BB to be multi visited. 337*fe6060f1SDimitry Andric VisitedOrInserted.insert(I); 338*fe6060f1SDimitry Andric // Sink the inserting point along the chain with NeedTileCfgLiveIn = 339*fe6060f1SDimitry Andric // true when MBB isn't all shapes reachable. 340*fe6060f1SDimitry Andric for (auto *Succ : I.MBB->successors()) 341*fe6060f1SDimitry Andric if (BBVisitedInfo[Succ].NeedTileCfgLiveIn) 342*fe6060f1SDimitry Andric WorkList.push_back(MIRef(Succ)); 343*fe6060f1SDimitry Andric } 344*fe6060f1SDimitry Andric } 345*fe6060f1SDimitry Andric } 346*fe6060f1SDimitry Andric 347*fe6060f1SDimitry Andric // A given point might be forked due to shape conditions are not met. 348*fe6060f1SDimitry Andric for (MIRef I : InsertPoints) { 349*fe6060f1SDimitry Andric // Make sure we insert ldtilecfg after the last shape def in MBB. 350*fe6060f1SDimitry Andric if (ShapeBBs.count(I.MBB) && I < ShapeBBs[I.MBB].back()) 351*fe6060f1SDimitry Andric I = ShapeBBs[I.MBB].back(); 352*fe6060f1SDimitry Andric // There're chances the MBB is sunk more than once. Record it to avoid 353*fe6060f1SDimitry Andric // multi insert. 354*fe6060f1SDimitry Andric if (VisitedOrInserted.insert(I).second) { 355*fe6060f1SDimitry Andric auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin(); 356*fe6060f1SDimitry Andric addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::LDTILECFG)), 357*fe6060f1SDimitry Andric SS); 358*fe6060f1SDimitry Andric } 359*fe6060f1SDimitry Andric } 360*fe6060f1SDimitry Andric } 361*fe6060f1SDimitry Andric 362e8d8bef9SDimitry Andric // Zero stack slot. 363*fe6060f1SDimitry Andric MachineBasicBlock &MBB = MF.front(); 364*fe6060f1SDimitry Andric MachineInstr *MI = &*MBB.begin(); 365*fe6060f1SDimitry Andric if (ST.hasAVX512()) { 366e8d8bef9SDimitry Andric Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); 367*fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::VPXORDZrr), Zmm) 368e8d8bef9SDimitry Andric .addReg(Zmm, RegState::Undef) 369e8d8bef9SDimitry Andric .addReg(Zmm, RegState::Undef); 370*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), SS) 371e8d8bef9SDimitry Andric .addReg(Zmm); 372*fe6060f1SDimitry Andric } else if (ST.hasAVX2()) { 373*fe6060f1SDimitry Andric Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); 374*fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::VPXORYrr), Ymm) 375*fe6060f1SDimitry Andric .addReg(Ymm, RegState::Undef) 376*fe6060f1SDimitry Andric .addReg(Ymm, RegState::Undef); 377*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS) 378*fe6060f1SDimitry Andric .addReg(Ymm); 379*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS, 32) 380*fe6060f1SDimitry Andric .addReg(Ymm); 381*fe6060f1SDimitry Andric } else { 382*fe6060f1SDimitry Andric assert(ST.hasSSE2() && "AMX should assume SSE2 enabled"); 383*fe6060f1SDimitry Andric Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); 384*fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::PXORrr), Xmm) 385*fe6060f1SDimitry Andric .addReg(Xmm, RegState::Undef) 386*fe6060f1SDimitry Andric .addReg(Xmm, RegState::Undef); 387*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS) 388*fe6060f1SDimitry Andric .addReg(Xmm); 389*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 16) 390*fe6060f1SDimitry Andric .addReg(Xmm); 391*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 32) 392*fe6060f1SDimitry Andric .addReg(Xmm); 393*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 48) 394*fe6060f1SDimitry Andric .addReg(Xmm); 395e8d8bef9SDimitry Andric } 396*fe6060f1SDimitry Andric // Fill in the palette first. 397*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), SS).addImm(1); 398e8d8bef9SDimitry Andric 399e8d8bef9SDimitry Andric return true; 400e8d8bef9SDimitry Andric } 401e8d8bef9SDimitry Andric 402e8d8bef9SDimitry Andric FunctionPass *llvm::createX86PreTileConfigPass() { 403e8d8bef9SDimitry Andric return new X86PreTileConfig(); 404e8d8bef9SDimitry Andric } 405