1fe6060f1SDimitry Andric //===-- X86PreTileConfig.cpp - Tile Register Pre-configure-----------------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9fe6060f1SDimitry Andric /// \file Pass to pre-config the shapes of AMX registers 10fe6060f1SDimitry Andric /// AMX register needs to be configured before use. The shapes of AMX register 11fe6060f1SDimitry Andric /// are encoded in the 1st and 2nd machine operand of AMX pseudo instructions. 12e8d8bef9SDimitry Andric /// 13fe6060f1SDimitry Andric /// The instruction ldtilecfg is used to config the shapes. It must be reachable 14fe6060f1SDimitry Andric /// for all variable shapes. ldtilecfg will be inserted more than once if we 15fe6060f1SDimitry Andric /// cannot find a dominating point for all AMX instructions. 16e8d8bef9SDimitry Andric /// 17fe6060f1SDimitry Andric /// The configure register is caller saved according to ABI. We need to insert 18fe6060f1SDimitry Andric /// ldtilecfg again after the call instruction if callee clobbers any AMX 19fe6060f1SDimitry Andric /// registers. 20e8d8bef9SDimitry Andric /// 21fe6060f1SDimitry Andric /// This pass calculates all points that ldtilecfg need to be inserted to and 22fe6060f1SDimitry Andric /// insert them. It reports error if the reachability conditions aren't met. 23e8d8bef9SDimitry Andric // 24e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 25e8d8bef9SDimitry Andric 26e8d8bef9SDimitry Andric #include "X86.h" 27e8d8bef9SDimitry Andric #include "X86InstrBuilder.h" 28349cc55cSDimitry Andric #include "X86MachineFunctionInfo.h" 29e8d8bef9SDimitry Andric #include "X86RegisterInfo.h" 30e8d8bef9SDimitry Andric #include "X86Subtarget.h" 315f757f3fSDimitry Andric #include "llvm/ADT/SmallSet.h" 32e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 33e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 34fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 3581ad6265SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 36e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 37e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h" 38e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 39e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 40*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h" 41e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 42e8d8bef9SDimitry Andric 43e8d8bef9SDimitry Andric using namespace llvm; 44e8d8bef9SDimitry Andric 45e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-pre-config" 4681ad6265SDimitry Andric 4781ad6265SDimitry Andric static void emitErrorMsg(MachineFunction &MF) { 48*0fca6ea1SDimitry Andric LLVMContext &Context = MF.getFunction().getContext(); 4906c3fb27SDimitry Andric Context.emitError( 5006c3fb27SDimitry Andric MF.getName() + 5106c3fb27SDimitry Andric ": Failed to config tile register, please define the shape earlier"); 5281ad6265SDimitry Andric } 53e8d8bef9SDimitry Andric 54e8d8bef9SDimitry Andric namespace { 55e8d8bef9SDimitry Andric 56fe6060f1SDimitry Andric struct MIRef { 57fe6060f1SDimitry Andric MachineInstr *MI = nullptr; 58fe6060f1SDimitry Andric MachineBasicBlock *MBB = nullptr; 59fe6060f1SDimitry Andric // A virtual position for instruction that will be inserted after MI. 60fe6060f1SDimitry Andric size_t Pos = 0; 61fe6060f1SDimitry Andric MIRef() = default; 62fe6060f1SDimitry Andric MIRef(MachineBasicBlock *MBB) : MBB(MBB) { 63fe6060f1SDimitry Andric for (auto I = MBB->begin(), E = MBB->end(); I != E && I->isPHI(); 64fe6060f1SDimitry Andric ++I, ++Pos) 65fe6060f1SDimitry Andric MI = &*I; 66fe6060f1SDimitry Andric } 67fe6060f1SDimitry Andric MIRef(MachineInstr *MI) 68fe6060f1SDimitry Andric : MI(MI), MBB(MI->getParent()), 69fe6060f1SDimitry Andric Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {} 70fe6060f1SDimitry Andric MIRef(MachineInstr *MI, MachineBasicBlock *MBB) 71fe6060f1SDimitry Andric : MI(MI), MBB(MBB), 72fe6060f1SDimitry Andric Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {} 73fe6060f1SDimitry Andric MIRef(MachineInstr *MI, MachineBasicBlock *MBB, size_t Pos) 74fe6060f1SDimitry Andric : MI(MI), MBB(MBB), Pos(Pos) {} 75fe6060f1SDimitry Andric operator bool() const { return MBB != nullptr; } 76fe6060f1SDimitry Andric bool operator==(const MIRef &RHS) const { 77fe6060f1SDimitry Andric return MI == RHS.MI && MBB == RHS.MBB; 78fe6060f1SDimitry Andric } 79fe6060f1SDimitry Andric bool operator!=(const MIRef &RHS) const { return !(*this == RHS); } 80fe6060f1SDimitry Andric bool operator<(const MIRef &RHS) const { 81fe6060f1SDimitry Andric // Comparison between different BBs happens when inserting a MIRef into set. 82fe6060f1SDimitry Andric // So we compare MBB first to make the insertion happy. 83fe6060f1SDimitry Andric return MBB < RHS.MBB || (MBB == RHS.MBB && Pos < RHS.Pos); 84fe6060f1SDimitry Andric } 85fe6060f1SDimitry Andric bool operator>(const MIRef &RHS) const { 86fe6060f1SDimitry Andric // Comparison between different BBs happens when inserting a MIRef into set. 87fe6060f1SDimitry Andric // So we compare MBB first to make the insertion happy. 88fe6060f1SDimitry Andric return MBB > RHS.MBB || (MBB == RHS.MBB && Pos > RHS.Pos); 89fe6060f1SDimitry Andric } 90fe6060f1SDimitry Andric }; 91e8d8bef9SDimitry Andric 92fe6060f1SDimitry Andric struct BBInfo { 93fe6060f1SDimitry Andric MIRef FirstAMX; 94fe6060f1SDimitry Andric MIRef LastCall; 95fe6060f1SDimitry Andric bool HasAMXRegLiveIn = false; 96fe6060f1SDimitry Andric bool TileCfgForbidden = false; 97fe6060f1SDimitry Andric bool NeedTileCfgLiveIn = false; 98fe6060f1SDimitry Andric }; 99fe6060f1SDimitry Andric 100fe6060f1SDimitry Andric class X86PreTileConfig : public MachineFunctionPass { 10106c3fb27SDimitry Andric MachineRegisterInfo *MRI = nullptr; 10206c3fb27SDimitry Andric const MachineLoopInfo *MLI = nullptr; 103fe6060f1SDimitry Andric SmallSet<MachineInstr *, 8> DefVisited; 104fe6060f1SDimitry Andric DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo; 105fe6060f1SDimitry Andric DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs; 106fe6060f1SDimitry Andric 107fe6060f1SDimitry Andric /// Check if the callee will clobber AMX registers. 108fe6060f1SDimitry Andric bool isDestructiveCall(MachineInstr &MI, BitVector UsableRegs) { 109fe6060f1SDimitry Andric auto Iter = llvm::find_if( 110fe6060f1SDimitry Andric MI.operands(), [](MachineOperand &MO) { return MO.isRegMask(); }); 111fe6060f1SDimitry Andric if (Iter == MI.operands_end()) 112fe6060f1SDimitry Andric return false; 113fe6060f1SDimitry Andric UsableRegs.clearBitsInMask(Iter->getRegMask()); 114fe6060f1SDimitry Andric return !UsableRegs.none(); 115fe6060f1SDimitry Andric } 116fe6060f1SDimitry Andric 117fe6060f1SDimitry Andric /// Check if MI is AMX pseudo instruction. 118fe6060f1SDimitry Andric bool isAMXInstruction(MachineInstr &MI) { 119fe6060f1SDimitry Andric if (MI.isPHI() || MI.isDebugInstr() || MI.getNumOperands() < 3) 120fe6060f1SDimitry Andric return false; 121fe6060f1SDimitry Andric MachineOperand &MO = MI.getOperand(0); 122fe6060f1SDimitry Andric // We can simply check if it is AMX instruction by its def. 123fe6060f1SDimitry Andric // But we should exclude old API which uses physical registers. 124fe6060f1SDimitry Andric if (MO.isReg() && MO.getReg().isVirtual() && 125fe6060f1SDimitry Andric MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) { 126fe6060f1SDimitry Andric collectShapeInfo(MI); 127fe6060f1SDimitry Andric return true; 128fe6060f1SDimitry Andric } 129fe6060f1SDimitry Andric // PTILESTOREDV is the only exception that doesn't def a AMX register. 130fe6060f1SDimitry Andric return MI.getOpcode() == X86::PTILESTOREDV; 131fe6060f1SDimitry Andric } 132fe6060f1SDimitry Andric 133fe6060f1SDimitry Andric /// Check if it is an edge from loop bottom to loop head. 134fe6060f1SDimitry Andric bool isLoopBackEdge(MachineBasicBlock *Header, MachineBasicBlock *Bottom) { 135fe6060f1SDimitry Andric if (!MLI->isLoopHeader(Header)) 136fe6060f1SDimitry Andric return false; 137fe6060f1SDimitry Andric auto *ML = MLI->getLoopFor(Header); 138fe6060f1SDimitry Andric if (ML->contains(Bottom) && ML->isLoopLatch(Bottom)) 139fe6060f1SDimitry Andric return true; 140fe6060f1SDimitry Andric 141fe6060f1SDimitry Andric return false; 142fe6060f1SDimitry Andric } 143fe6060f1SDimitry Andric 144fe6060f1SDimitry Andric /// Collect the shape def information for later use. 145fe6060f1SDimitry Andric void collectShapeInfo(MachineInstr &MI); 146fe6060f1SDimitry Andric 147fe6060f1SDimitry Andric /// Try to hoist shapes definded below AMX instructions. 148fe6060f1SDimitry Andric bool hoistShapesInBB(MachineBasicBlock *MBB, SmallVectorImpl<MIRef> &Shapes) { 149fe6060f1SDimitry Andric MIRef &FirstAMX = BBVisitedInfo[MBB].FirstAMX; 150fe6060f1SDimitry Andric auto FirstShapeBelowAMX = llvm::lower_bound(Shapes, FirstAMX); 151fe6060f1SDimitry Andric auto InsertPoint = FirstAMX.MI->getIterator(); 152fe6060f1SDimitry Andric for (auto I = FirstShapeBelowAMX, E = Shapes.end(); I != E; ++I) { 153fe6060f1SDimitry Andric // Do not hoist instructions that access memory. 154fe6060f1SDimitry Andric if (I->MI->mayLoadOrStore()) 155fe6060f1SDimitry Andric return false; 156fe6060f1SDimitry Andric for (auto &MO : I->MI->operands()) { 157fe6060f1SDimitry Andric if (MO.isDef()) 158fe6060f1SDimitry Andric continue; 159fe6060f1SDimitry Andric // Do not hoist instructions if the sources' def under AMX instruction. 160fe6060f1SDimitry Andric // TODO: We can handle isMoveImmediate MI here. 161fe6060f1SDimitry Andric if (MO.isReg() && MIRef(MRI->getVRegDef(MO.getReg())) > FirstAMX) 162fe6060f1SDimitry Andric return false; 163fe6060f1SDimitry Andric // TODO: Maybe need more checks here. 164fe6060f1SDimitry Andric } 165fe6060f1SDimitry Andric MBB->insert(InsertPoint, I->MI->removeFromParent()); 166fe6060f1SDimitry Andric } 167fe6060f1SDimitry Andric // We only need to mark the last shape in the BB now. 168fe6060f1SDimitry Andric Shapes.clear(); 169fe6060f1SDimitry Andric Shapes.push_back(MIRef(&*--InsertPoint, MBB)); 170fe6060f1SDimitry Andric return true; 171fe6060f1SDimitry Andric } 172e8d8bef9SDimitry Andric 173e8d8bef9SDimitry Andric public: 174e8d8bef9SDimitry Andric X86PreTileConfig() : MachineFunctionPass(ID) {} 175e8d8bef9SDimitry Andric 176e8d8bef9SDimitry Andric /// Return the pass name. 177e8d8bef9SDimitry Andric StringRef getPassName() const override { 178e8d8bef9SDimitry Andric return "Tile Register Pre-configure"; 179e8d8bef9SDimitry Andric } 180e8d8bef9SDimitry Andric 181e8d8bef9SDimitry Andric /// X86PreTileConfig analysis usage. 182fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 183fe6060f1SDimitry Andric AU.setPreservesAll(); 184*0fca6ea1SDimitry Andric AU.addRequired<MachineLoopInfoWrapperPass>(); 185fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 186fe6060f1SDimitry Andric } 187e8d8bef9SDimitry Andric 188fe6060f1SDimitry Andric /// Clear MF related structures. 189fe6060f1SDimitry Andric void releaseMemory() override { 190fe6060f1SDimitry Andric ShapeBBs.clear(); 191fe6060f1SDimitry Andric DefVisited.clear(); 192fe6060f1SDimitry Andric BBVisitedInfo.clear(); 193fe6060f1SDimitry Andric } 194fe6060f1SDimitry Andric 195fe6060f1SDimitry Andric /// Perform ldtilecfg instructions inserting. 196fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 197e8d8bef9SDimitry Andric 198e8d8bef9SDimitry Andric static char ID; 199e8d8bef9SDimitry Andric }; 200e8d8bef9SDimitry Andric 201e8d8bef9SDimitry Andric } // end anonymous namespace 202e8d8bef9SDimitry Andric 203e8d8bef9SDimitry Andric char X86PreTileConfig::ID = 0; 204e8d8bef9SDimitry Andric 205e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86PreTileConfig, "tilepreconfig", 206fe6060f1SDimitry Andric "Tile Register Pre-configure", false, false) 207*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 208e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig", 209fe6060f1SDimitry Andric "Tile Register Pre-configure", false, false) 210e8d8bef9SDimitry Andric 211fe6060f1SDimitry Andric void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) { 212fe6060f1SDimitry Andric auto RecordShape = [&](MachineInstr *MI, MachineBasicBlock *MBB) { 213fe6060f1SDimitry Andric MIRef MIR(MI, MBB); 214fe6060f1SDimitry Andric auto I = llvm::lower_bound(ShapeBBs[MBB], MIR); 215fe6060f1SDimitry Andric if (I == ShapeBBs[MBB].end() || *I != MIR) 216fe6060f1SDimitry Andric ShapeBBs[MBB].insert(I, MIR); 217fe6060f1SDimitry Andric }; 218fe6060f1SDimitry Andric 219fe6060f1SDimitry Andric SmallVector<Register, 8> WorkList( 220fe6060f1SDimitry Andric {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); 221fe6060f1SDimitry Andric while (!WorkList.empty()) { 222fe6060f1SDimitry Andric Register R = WorkList.pop_back_val(); 223fe6060f1SDimitry Andric MachineInstr *DefMI = MRI->getVRegDef(R); 224fe6060f1SDimitry Andric assert(DefMI && "R must has one define instruction"); 225fe6060f1SDimitry Andric MachineBasicBlock *DefMBB = DefMI->getParent(); 226fe6060f1SDimitry Andric if (DefMI->isMoveImmediate() || !DefVisited.insert(DefMI).second) 227fe6060f1SDimitry Andric continue; 228fe6060f1SDimitry Andric if (DefMI->isPHI()) { 229fe6060f1SDimitry Andric for (unsigned I = 1; I < DefMI->getNumOperands(); I += 2) 230fe6060f1SDimitry Andric if (isLoopBackEdge(DefMBB, DefMI->getOperand(I + 1).getMBB())) 231fe6060f1SDimitry Andric RecordShape(DefMI, DefMBB); // In this case, PHI is also a shape def. 232fe6060f1SDimitry Andric else 233fe6060f1SDimitry Andric WorkList.push_back(DefMI->getOperand(I).getReg()); 234fe6060f1SDimitry Andric } else { 235fe6060f1SDimitry Andric RecordShape(DefMI, DefMBB); 236fe6060f1SDimitry Andric } 237fe6060f1SDimitry Andric } 238e8d8bef9SDimitry Andric } 239e8d8bef9SDimitry Andric 240fe6060f1SDimitry Andric bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) { 241*0fca6ea1SDimitry Andric X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 242*0fca6ea1SDimitry Andric // Early exit in the common case of non-AMX code. 243*0fca6ea1SDimitry Andric if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA) 244*0fca6ea1SDimitry Andric return false; 245*0fca6ea1SDimitry Andric 246fe6060f1SDimitry Andric const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 247fe6060f1SDimitry Andric const TargetInstrInfo *TII = ST.getInstrInfo(); 248fe6060f1SDimitry Andric const TargetRegisterInfo *TRI = ST.getRegisterInfo(); 249fe6060f1SDimitry Andric const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID); 250e8d8bef9SDimitry Andric 251fe6060f1SDimitry Andric BitVector AMXRegs(TRI->getNumRegs()); 252fe6060f1SDimitry Andric for (unsigned I = 0; I < RC->getNumRegs(); I++) 253fe6060f1SDimitry Andric AMXRegs.set(X86::TMM0 + I); 254fe6060f1SDimitry Andric 255fe6060f1SDimitry Andric // Iterate MF to collect information. 256fe6060f1SDimitry Andric MRI = &MF.getRegInfo(); 257*0fca6ea1SDimitry Andric MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 258fe6060f1SDimitry Andric SmallSet<MIRef, 8> CfgNeedInsert; 259fe6060f1SDimitry Andric SmallVector<MachineBasicBlock *, 8> CfgLiveInBBs; 260fe6060f1SDimitry Andric for (auto &MBB : MF) { 261fe6060f1SDimitry Andric size_t Pos = 0; 262fe6060f1SDimitry Andric for (auto &MI : MBB) { 263fe6060f1SDimitry Andric ++Pos; 264fe6060f1SDimitry Andric if (isAMXInstruction(MI)) { 265fe6060f1SDimitry Andric // If there's call before the AMX, we need to reload tile config. 266fe6060f1SDimitry Andric if (BBVisitedInfo[&MBB].LastCall) 267fe6060f1SDimitry Andric CfgNeedInsert.insert(BBVisitedInfo[&MBB].LastCall); 268fe6060f1SDimitry Andric else // Otherwise, we need tile config to live in this BB. 269fe6060f1SDimitry Andric BBVisitedInfo[&MBB].NeedTileCfgLiveIn = true; 270fe6060f1SDimitry Andric // Always record the first AMX in case there's shape def after it. 271fe6060f1SDimitry Andric if (!BBVisitedInfo[&MBB].FirstAMX) 272fe6060f1SDimitry Andric BBVisitedInfo[&MBB].FirstAMX = MIRef(&MI, &MBB, Pos); 273fe6060f1SDimitry Andric } else if (MI.isCall() && isDestructiveCall(MI, AMXRegs)) { 274fe6060f1SDimitry Andric // Record the call only if the callee clobbers all AMX registers. 275fe6060f1SDimitry Andric BBVisitedInfo[&MBB].LastCall = MIRef(&MI, &MBB, Pos); 276fe6060f1SDimitry Andric } 277fe6060f1SDimitry Andric } 278fe6060f1SDimitry Andric if (BBVisitedInfo[&MBB].NeedTileCfgLiveIn) { 279fe6060f1SDimitry Andric if (&MBB == &MF.front()) 280fe6060f1SDimitry Andric CfgNeedInsert.insert(MIRef(&MBB)); 281fe6060f1SDimitry Andric else 282fe6060f1SDimitry Andric CfgLiveInBBs.push_back(&MBB); 283fe6060f1SDimitry Andric } 284fe6060f1SDimitry Andric if (BBVisitedInfo[&MBB].FirstAMX || BBVisitedInfo[&MBB].HasAMXRegLiveIn) 285fe6060f1SDimitry Andric for (auto *Succ : MBB.successors()) 286fe6060f1SDimitry Andric if (!isLoopBackEdge(Succ, &MBB)) 287fe6060f1SDimitry Andric BBVisitedInfo[Succ].HasAMXRegLiveIn = true; 288fe6060f1SDimitry Andric } 289fe6060f1SDimitry Andric 290fe6060f1SDimitry Andric // Update NeedTileCfgLiveIn for predecessors. 291fe6060f1SDimitry Andric while (!CfgLiveInBBs.empty()) { 292fe6060f1SDimitry Andric MachineBasicBlock *MBB = CfgLiveInBBs.pop_back_val(); 293fe6060f1SDimitry Andric for (auto *Pred : MBB->predecessors()) { 294fe6060f1SDimitry Andric if (BBVisitedInfo[Pred].LastCall) { 295fe6060f1SDimitry Andric CfgNeedInsert.insert(BBVisitedInfo[Pred].LastCall); 296fe6060f1SDimitry Andric } else if (!BBVisitedInfo[Pred].NeedTileCfgLiveIn) { 297fe6060f1SDimitry Andric BBVisitedInfo[Pred].NeedTileCfgLiveIn = true; 298fe6060f1SDimitry Andric if (Pred == &MF.front()) 299fe6060f1SDimitry Andric CfgNeedInsert.insert(MIRef(Pred)); 300fe6060f1SDimitry Andric else 301fe6060f1SDimitry Andric CfgLiveInBBs.push_back(Pred); 302fe6060f1SDimitry Andric } 303fe6060f1SDimitry Andric } 304fe6060f1SDimitry Andric } 305fe6060f1SDimitry Andric 306fe6060f1SDimitry Andric // There's no AMX instruction if we didn't find a tile config live in point. 307fe6060f1SDimitry Andric if (CfgNeedInsert.empty()) 308fe6060f1SDimitry Andric return false; 309fe6060f1SDimitry Andric 310fe6060f1SDimitry Andric // Avoid to insert ldtilecfg before any shape defs. 311fe6060f1SDimitry Andric SmallVector<MachineBasicBlock *, 8> WorkList; 312fe6060f1SDimitry Andric for (auto &I : ShapeBBs) { 313fe6060f1SDimitry Andric // TODO: We can hoist shapes across BBs here. 31481ad6265SDimitry Andric if (BBVisitedInfo[I.first].HasAMXRegLiveIn) { 31581ad6265SDimitry Andric // We are not able to config tile registers since the shape to config 31681ad6265SDimitry Andric // is not defined yet. Emit error message and continue. The function 31781ad6265SDimitry Andric // would not config tile registers. 31881ad6265SDimitry Andric emitErrorMsg(MF); 31981ad6265SDimitry Andric return false; 32081ad6265SDimitry Andric } 321fe6060f1SDimitry Andric if (BBVisitedInfo[I.first].FirstAMX && 322fe6060f1SDimitry Andric BBVisitedInfo[I.first].FirstAMX < I.second.back() && 32381ad6265SDimitry Andric !hoistShapesInBB(I.first, I.second)) { 32481ad6265SDimitry Andric emitErrorMsg(MF); 32581ad6265SDimitry Andric return false; 32681ad6265SDimitry Andric } 327fe6060f1SDimitry Andric WorkList.push_back(I.first); 328fe6060f1SDimitry Andric } 329fe6060f1SDimitry Andric while (!WorkList.empty()) { 330fe6060f1SDimitry Andric MachineBasicBlock *MBB = WorkList.pop_back_val(); 331fe6060f1SDimitry Andric for (auto *Pred : MBB->predecessors()) { 332fe6060f1SDimitry Andric if (!BBVisitedInfo[Pred].TileCfgForbidden && !isLoopBackEdge(MBB, Pred)) { 333fe6060f1SDimitry Andric BBVisitedInfo[Pred].TileCfgForbidden = true; 334fe6060f1SDimitry Andric WorkList.push_back(Pred); 335fe6060f1SDimitry Andric } 336fe6060f1SDimitry Andric } 337fe6060f1SDimitry Andric } 338fe6060f1SDimitry Andric 339fe6060f1SDimitry Andric DebugLoc DL; 340fe6060f1SDimitry Andric SmallSet<MIRef, 8> VisitedOrInserted; 341fe6060f1SDimitry Andric int SS = MF.getFrameInfo().CreateStackObject( 342fe6060f1SDimitry Andric ST.getTileConfigSize(), ST.getTileConfigAlignment(), false); 343fe6060f1SDimitry Andric 344fe6060f1SDimitry Andric // Try to insert for the tile config live in points. 345349cc55cSDimitry Andric for (const auto &I : CfgNeedInsert) { 346fe6060f1SDimitry Andric SmallSet<MIRef, 8> InsertPoints; 347fe6060f1SDimitry Andric SmallVector<MIRef, 8> WorkList({I}); 348fe6060f1SDimitry Andric while (!WorkList.empty()) { 349fe6060f1SDimitry Andric MIRef I = WorkList.pop_back_val(); 350fe6060f1SDimitry Andric if (!VisitedOrInserted.count(I)) { 351fe6060f1SDimitry Andric if (!BBVisitedInfo[I.MBB].TileCfgForbidden) { 352fe6060f1SDimitry Andric // If the BB is all shapes reachable, stop sink and try to insert. 353fe6060f1SDimitry Andric InsertPoints.insert(I); 354fe6060f1SDimitry Andric } else { 355fe6060f1SDimitry Andric // Avoid the BB to be multi visited. 356fe6060f1SDimitry Andric VisitedOrInserted.insert(I); 357fe6060f1SDimitry Andric // Sink the inserting point along the chain with NeedTileCfgLiveIn = 358fe6060f1SDimitry Andric // true when MBB isn't all shapes reachable. 359fe6060f1SDimitry Andric for (auto *Succ : I.MBB->successors()) 360fe6060f1SDimitry Andric if (BBVisitedInfo[Succ].NeedTileCfgLiveIn) 361fe6060f1SDimitry Andric WorkList.push_back(MIRef(Succ)); 362fe6060f1SDimitry Andric } 363fe6060f1SDimitry Andric } 364fe6060f1SDimitry Andric } 365fe6060f1SDimitry Andric 366fe6060f1SDimitry Andric // A given point might be forked due to shape conditions are not met. 367fe6060f1SDimitry Andric for (MIRef I : InsertPoints) { 368fe6060f1SDimitry Andric // Make sure we insert ldtilecfg after the last shape def in MBB. 369fe6060f1SDimitry Andric if (ShapeBBs.count(I.MBB) && I < ShapeBBs[I.MBB].back()) 370fe6060f1SDimitry Andric I = ShapeBBs[I.MBB].back(); 371fe6060f1SDimitry Andric // There're chances the MBB is sunk more than once. Record it to avoid 372fe6060f1SDimitry Andric // multi insert. 373fe6060f1SDimitry Andric if (VisitedOrInserted.insert(I).second) { 374fe6060f1SDimitry Andric auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin(); 37581ad6265SDimitry Andric addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::PLDTILECFGV)), 376fe6060f1SDimitry Andric SS); 377fe6060f1SDimitry Andric } 378fe6060f1SDimitry Andric } 379fe6060f1SDimitry Andric } 380fe6060f1SDimitry Andric 381e8d8bef9SDimitry Andric // Zero stack slot. 382fe6060f1SDimitry Andric MachineBasicBlock &MBB = MF.front(); 383fe6060f1SDimitry Andric MachineInstr *MI = &*MBB.begin(); 384fe6060f1SDimitry Andric if (ST.hasAVX512()) { 385e8d8bef9SDimitry Andric Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); 38681ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm); 387fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), SS) 388e8d8bef9SDimitry Andric .addReg(Zmm); 389fe6060f1SDimitry Andric } else if (ST.hasAVX2()) { 390fe6060f1SDimitry Andric Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); 39181ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm); 392fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS) 393fe6060f1SDimitry Andric .addReg(Ymm); 394fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS, 32) 395fe6060f1SDimitry Andric .addReg(Ymm); 396fe6060f1SDimitry Andric } else { 397fe6060f1SDimitry Andric assert(ST.hasSSE2() && "AMX should assume SSE2 enabled"); 39881ad6265SDimitry Andric unsigned StoreOpc = ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; 399fe6060f1SDimitry Andric Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); 40081ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm); 40181ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS).addReg(Xmm); 40281ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 16) 403fe6060f1SDimitry Andric .addReg(Xmm); 40481ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 32) 405fe6060f1SDimitry Andric .addReg(Xmm); 40681ad6265SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 48) 407fe6060f1SDimitry Andric .addReg(Xmm); 408e8d8bef9SDimitry Andric } 409fe6060f1SDimitry Andric // Fill in the palette first. 410fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), SS).addImm(1); 411e8d8bef9SDimitry Andric 412e8d8bef9SDimitry Andric return true; 413e8d8bef9SDimitry Andric } 414e8d8bef9SDimitry Andric 415e8d8bef9SDimitry Andric FunctionPass *llvm::createX86PreTileConfigPass() { 416e8d8bef9SDimitry Andric return new X86PreTileConfig(); 417e8d8bef9SDimitry Andric } 418