xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86LowerTileCopy.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1fe6060f1SDimitry Andric //===-- X86LowerTileCopy.cpp - Expand Tile Copy Instructions---------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric // This file defines the pass which lower AMX tile copy instructions. Since
10fe6060f1SDimitry Andric // there is no tile copy instruction, we need store tile register to stack
11fe6060f1SDimitry Andric // and load from stack to another tile register. We need extra GR to hold
12fe6060f1SDimitry Andric // the stride, and we need stack slot to hold the tile data register.
13fe6060f1SDimitry Andric // We would run this pass after copy propagation, so that we don't miss copy
14fe6060f1SDimitry Andric // optimization. And we would run this pass before prolog/epilog insertion,
15fe6060f1SDimitry Andric // so that we can allocate stack slot.
16fe6060f1SDimitry Andric //
17fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
18fe6060f1SDimitry Andric 
19fe6060f1SDimitry Andric #include "X86.h"
20fe6060f1SDimitry Andric #include "X86InstrBuilder.h"
21fe6060f1SDimitry Andric #include "X86InstrInfo.h"
22*0fca6ea1SDimitry Andric #include "X86MachineFunctionInfo.h"
23fe6060f1SDimitry Andric #include "X86Subtarget.h"
24*0fca6ea1SDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h"
25fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
26fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
27fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
28fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
29fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
30fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
31fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
32fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h"
33fe6060f1SDimitry Andric #include "llvm/IR/DebugLoc.h"
34fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
35fe6060f1SDimitry Andric #include "llvm/Support/Debug.h"
36fe6060f1SDimitry Andric 
37fe6060f1SDimitry Andric using namespace llvm;
38fe6060f1SDimitry Andric 
39fe6060f1SDimitry Andric #define DEBUG_TYPE "x86-lower-tile-copy"
40fe6060f1SDimitry Andric 
41fe6060f1SDimitry Andric namespace {
42fe6060f1SDimitry Andric 
43fe6060f1SDimitry Andric class X86LowerTileCopy : public MachineFunctionPass {
44fe6060f1SDimitry Andric public:
45fe6060f1SDimitry Andric   static char ID;
46fe6060f1SDimitry Andric 
47fe6060f1SDimitry Andric   X86LowerTileCopy() : MachineFunctionPass(ID) {}
48fe6060f1SDimitry Andric 
49fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
50fe6060f1SDimitry Andric 
51fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
52fe6060f1SDimitry Andric 
53fe6060f1SDimitry Andric   StringRef getPassName() const override { return "X86 Lower Tile Copy"; }
54fe6060f1SDimitry Andric };
55fe6060f1SDimitry Andric 
56fe6060f1SDimitry Andric } // namespace
57fe6060f1SDimitry Andric 
58fe6060f1SDimitry Andric char X86LowerTileCopy::ID = 0;
59fe6060f1SDimitry Andric 
60fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(X86LowerTileCopy, "lowertilecopy", "Tile Copy Lowering",
61fe6060f1SDimitry Andric                       false, false)
62fe6060f1SDimitry Andric INITIALIZE_PASS_END(X86LowerTileCopy, "lowertilecopy", "Tile Copy Lowering",
63fe6060f1SDimitry Andric                     false, false)
64fe6060f1SDimitry Andric 
65fe6060f1SDimitry Andric void X86LowerTileCopy::getAnalysisUsage(AnalysisUsage &AU) const {
66fe6060f1SDimitry Andric   AU.setPreservesAll();
67fe6060f1SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
68fe6060f1SDimitry Andric }
69fe6060f1SDimitry Andric 
70fe6060f1SDimitry Andric FunctionPass *llvm::createX86LowerTileCopyPass() {
71fe6060f1SDimitry Andric   return new X86LowerTileCopy();
72fe6060f1SDimitry Andric }
73fe6060f1SDimitry Andric 
74fe6060f1SDimitry Andric bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
75*0fca6ea1SDimitry Andric   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
76*0fca6ea1SDimitry Andric   if (FuncInfo->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
77*0fca6ea1SDimitry Andric     return false;
78*0fca6ea1SDimitry Andric 
79fe6060f1SDimitry Andric   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
80fe6060f1SDimitry Andric   const X86InstrInfo *TII = ST.getInstrInfo();
81*0fca6ea1SDimitry Andric   const TargetRegisterInfo *TRI = ST.getRegisterInfo();
82*0fca6ea1SDimitry Andric   BitVector GR64Regs =
83*0fca6ea1SDimitry Andric       TRI->getAllocatableSet(MF, TRI->getRegClass(X86::GR64RegClassID));
84*0fca6ea1SDimitry Andric   BitVector TILERegs =
85*0fca6ea1SDimitry Andric       TRI->getAllocatableSet(MF, TRI->getRegClass(X86::TILERegClassID));
86fe6060f1SDimitry Andric   bool Changed = false;
87fe6060f1SDimitry Andric 
88fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
89*0fca6ea1SDimitry Andric     LiveRegUnits UsedRegs(*TRI);
90*0fca6ea1SDimitry Andric     UsedRegs.addLiveOuts(MBB);
91*0fca6ea1SDimitry Andric     for (MachineInstr &MI : llvm::make_early_inc_range(reverse(MBB))) {
92*0fca6ea1SDimitry Andric       UsedRegs.stepBackward(MI);
93fe6060f1SDimitry Andric       if (!MI.isCopy())
94fe6060f1SDimitry Andric         continue;
95fe6060f1SDimitry Andric       MachineOperand &DstMO = MI.getOperand(0);
96fe6060f1SDimitry Andric       MachineOperand &SrcMO = MI.getOperand(1);
97fe6060f1SDimitry Andric       Register SrcReg = SrcMO.getReg();
98fe6060f1SDimitry Andric       Register DstReg = DstMO.getReg();
99fe6060f1SDimitry Andric       if (!X86::TILERegClass.contains(DstReg, SrcReg))
100fe6060f1SDimitry Andric         continue;
101fe6060f1SDimitry Andric 
102fe6060f1SDimitry Andric       // Allocate stack slot for tile register
103fe6060f1SDimitry Andric       unsigned Size = TRI->getSpillSize(X86::TILERegClass);
104fe6060f1SDimitry Andric       Align Alignment = TRI->getSpillAlign(X86::TILERegClass);
105fe6060f1SDimitry Andric       int TileSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
106*0fca6ea1SDimitry Andric 
107*0fca6ea1SDimitry Andric       int StrideSS = 0;
108*0fca6ea1SDimitry Andric 
109*0fca6ea1SDimitry Andric       // Pick a killed register to avoid a save/reload.
110*0fca6ea1SDimitry Andric       Register GR64Cand = X86::NoRegister;
111*0fca6ea1SDimitry Andric       for (auto RegT : GR64Regs.set_bits()) {
112*0fca6ea1SDimitry Andric         if (UsedRegs.available(RegT)) {
113*0fca6ea1SDimitry Andric           GR64Cand = RegT;
114*0fca6ea1SDimitry Andric           break;
115*0fca6ea1SDimitry Andric         }
116*0fca6ea1SDimitry Andric       }
117*0fca6ea1SDimitry Andric 
118*0fca6ea1SDimitry Andric       const DebugLoc &DL = MI.getDebugLoc();
119*0fca6ea1SDimitry Andric       if (GR64Cand) {
120*0fca6ea1SDimitry Andric         // mov 64 %reg
121*0fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
122*0fca6ea1SDimitry Andric       } else {
123*0fca6ea1SDimitry Andric         // No available register? Save RAX and reload it after use.
124*0fca6ea1SDimitry Andric 
125fe6060f1SDimitry Andric         // Allocate stack slot for stride register
126fe6060f1SDimitry Andric         Size = TRI->getSpillSize(X86::GR64RegClass);
127fe6060f1SDimitry Andric         Alignment = TRI->getSpillAlign(X86::GR64RegClass);
128*0fca6ea1SDimitry Andric         StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
129fe6060f1SDimitry Andric 
130*0fca6ea1SDimitry Andric         // mov %reg (%sp)
131*0fca6ea1SDimitry Andric         addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)),
132*0fca6ea1SDimitry Andric                           StrideSS)
133*0fca6ea1SDimitry Andric             .addReg(X86::RAX);
134*0fca6ea1SDimitry Andric         // mov 64 %reg
135*0fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), X86::RAX).addImm(64);
136*0fca6ea1SDimitry Andric       }
137fe6060f1SDimitry Andric       // tilestored %tmm, (%sp, %idx)
1387a6dacacSDimitry Andric #define GET_EGPR_IF_ENABLED(OPC) (ST.hasEGPR() ? OPC##_EVEX : OPC)
1397a6dacacSDimitry Andric       unsigned Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
140fe6060f1SDimitry Andric       MachineInstr *NewMI =
141fe6060f1SDimitry Andric           addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS)
142fe6060f1SDimitry Andric               .addReg(SrcReg, getKillRegState(SrcMO.isKill()));
143fe6060f1SDimitry Andric       MachineOperand &MO = NewMI->getOperand(2);
144*0fca6ea1SDimitry Andric       MO.setReg(GR64Cand ? GR64Cand : X86::RAX);
145fe6060f1SDimitry Andric       MO.setIsKill(true);
146fe6060f1SDimitry Andric       // tileloadd (%sp, %idx), %tmm
1477a6dacacSDimitry Andric       Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
1487a6dacacSDimitry Andric #undef GET_EGPR_IF_ENABLED
149fe6060f1SDimitry Andric       NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
150fe6060f1SDimitry Andric                                 TileSS);
151*0fca6ea1SDimitry Andric       if (!GR64Cand) {
152fe6060f1SDimitry Andric         // restore %rax
153fe6060f1SDimitry Andric         // mov (%sp) %rax
154*0fca6ea1SDimitry Andric         addFrameReference(
155*0fca6ea1SDimitry Andric             BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), X86::RAX), StrideSS);
156*0fca6ea1SDimitry Andric       }
157fe6060f1SDimitry Andric       MI.eraseFromParent();
158fe6060f1SDimitry Andric       Changed = true;
159fe6060f1SDimitry Andric     }
160fe6060f1SDimitry Andric   }
161fe6060f1SDimitry Andric   return Changed;
162fe6060f1SDimitry Andric }
163