xref: /openbsd-src/gnu/llvm/llvm/lib/Target/X86/X86FastPreTileConfig.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
1*d415bd75Srobert //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2*d415bd75Srobert //
3*d415bd75Srobert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*d415bd75Srobert // See https://llvm.org/LICENSE.txt for license information.
5*d415bd75Srobert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*d415bd75Srobert //
7*d415bd75Srobert //===----------------------------------------------------------------------===//
8*d415bd75Srobert //
9*d415bd75Srobert /// \file Pass to preconfig the shape of physical tile registers
10*d415bd75Srobert /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11*d415bd75Srobert /// walk each instruction of basic block in reverse order. All the tile
12*d415bd75Srobert /// registers that live out the basic block would be spilled and reloaded
13*d415bd75Srobert /// before its user. It also check the depenedency of the shape to ensure
14*d415bd75Srobert /// the shape is defined before ldtilecfg.
15*d415bd75Srobert //
16*d415bd75Srobert //===----------------------------------------------------------------------===//
17*d415bd75Srobert 
18*d415bd75Srobert #include "X86.h"
19*d415bd75Srobert #include "X86InstrBuilder.h"
20*d415bd75Srobert #include "X86MachineFunctionInfo.h"
21*d415bd75Srobert #include "X86RegisterInfo.h"
22*d415bd75Srobert #include "X86Subtarget.h"
23*d415bd75Srobert #include "llvm/ADT/DepthFirstIterator.h"
24*d415bd75Srobert #include "llvm/ADT/PostOrderIterator.h"
25*d415bd75Srobert #include "llvm/ADT/Statistic.h"
26*d415bd75Srobert #include "llvm/CodeGen/MachineFrameInfo.h"
27*d415bd75Srobert #include "llvm/CodeGen/MachineFunctionPass.h"
28*d415bd75Srobert #include "llvm/CodeGen/MachineInstr.h"
29*d415bd75Srobert #include "llvm/CodeGen/MachineRegisterInfo.h"
30*d415bd75Srobert #include "llvm/CodeGen/Passes.h"
31*d415bd75Srobert #include "llvm/CodeGen/TargetInstrInfo.h"
32*d415bd75Srobert #include "llvm/CodeGen/TargetRegisterInfo.h"
33*d415bd75Srobert #include "llvm/InitializePasses.h"
34*d415bd75Srobert #include "llvm/Support/Debug.h"
35*d415bd75Srobert 
36*d415bd75Srobert using namespace llvm;
37*d415bd75Srobert 
38*d415bd75Srobert #define DEBUG_TYPE "fastpretileconfig"
39*d415bd75Srobert 
40*d415bd75Srobert STATISTIC(NumStores, "Number of stores added");
41*d415bd75Srobert STATISTIC(NumLoads, "Number of loads added");
42*d415bd75Srobert 
43*d415bd75Srobert namespace {
44*d415bd75Srobert 
45*d415bd75Srobert class X86FastPreTileConfig : public MachineFunctionPass {
46*d415bd75Srobert   MachineFunction *MF = nullptr;
47*d415bd75Srobert   const X86Subtarget *ST = nullptr;
48*d415bd75Srobert   const TargetInstrInfo *TII = nullptr;
49*d415bd75Srobert   MachineRegisterInfo *MRI = nullptr;
50*d415bd75Srobert   X86MachineFunctionInfo *X86FI = nullptr;
51*d415bd75Srobert   MachineFrameInfo *MFI = nullptr;
52*d415bd75Srobert   const TargetRegisterInfo *TRI = nullptr;
53*d415bd75Srobert   MachineBasicBlock *MBB = nullptr;
54*d415bd75Srobert   int CfgSS = -1;
55*d415bd75Srobert   struct PHIInfo {
56*d415bd75Srobert     Register Row;
57*d415bd75Srobert     Register Col;
58*d415bd75Srobert     Register StackAddr;
59*d415bd75Srobert   };
60*d415bd75Srobert   DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
61*d415bd75Srobert 
62*d415bd75Srobert   /// Maps virtual regs to the frame index where these values are spilled.
63*d415bd75Srobert   IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
64*d415bd75Srobert 
65*d415bd75Srobert   /// Has a bit set for tile virtual register for which it was determined
66*d415bd75Srobert   /// that it is alive across blocks.
67*d415bd75Srobert   BitVector MayLiveAcrossBlocks;
68*d415bd75Srobert 
69*d415bd75Srobert   int getStackSpaceFor(Register VirtReg);
70*d415bd75Srobert   void InitializeTileConfigStackSpace();
71*d415bd75Srobert   bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
72*d415bd75Srobert   void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
73*d415bd75Srobert   void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
74*d415bd75Srobert               MachineOperand *RowMO, MachineOperand *ColMO);
75*d415bd75Srobert   void canonicalizePHIs(MachineBasicBlock &MBB);
76*d415bd75Srobert   void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
77*d415bd75Srobert   void convertPHIs(MachineBasicBlock &MBB);
78*d415bd75Srobert   bool configBasicBlock(MachineBasicBlock &MBB);
79*d415bd75Srobert 
80*d415bd75Srobert public:
X86FastPreTileConfig()81*d415bd75Srobert   X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
82*d415bd75Srobert 
83*d415bd75Srobert   /// Return the pass name.
getPassName() const84*d415bd75Srobert   StringRef getPassName() const override {
85*d415bd75Srobert     return "Fast Tile Register Preconfigure";
86*d415bd75Srobert   }
87*d415bd75Srobert 
88*d415bd75Srobert   /// Perform tile register configure.
89*d415bd75Srobert   bool runOnMachineFunction(MachineFunction &MFunc) override;
90*d415bd75Srobert 
91*d415bd75Srobert   static char ID;
92*d415bd75Srobert };
93*d415bd75Srobert 
94*d415bd75Srobert } // end anonymous namespace
95*d415bd75Srobert 
96*d415bd75Srobert char X86FastPreTileConfig::ID = 0;
97*d415bd75Srobert 
98*d415bd75Srobert INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
99*d415bd75Srobert                       "Fast Tile Register Preconfigure", false, false)
100*d415bd75Srobert INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
101*d415bd75Srobert                     "Fast Tile Register Preconfigure", false, false)
102*d415bd75Srobert 
dominates(MachineBasicBlock & MBB,MachineBasicBlock::const_iterator A,MachineBasicBlock::const_iterator B)103*d415bd75Srobert static bool dominates(MachineBasicBlock &MBB,
104*d415bd75Srobert                       MachineBasicBlock::const_iterator A,
105*d415bd75Srobert                       MachineBasicBlock::const_iterator B) {
106*d415bd75Srobert   auto MBBEnd = MBB.end();
107*d415bd75Srobert   if (B == MBBEnd)
108*d415bd75Srobert     return true;
109*d415bd75Srobert 
110*d415bd75Srobert   MachineBasicBlock::const_iterator I = MBB.begin();
111*d415bd75Srobert   for (; &*I != A && &*I != B; ++I)
112*d415bd75Srobert     ;
113*d415bd75Srobert 
114*d415bd75Srobert   return &*I == A;
115*d415bd75Srobert }
116*d415bd75Srobert 
117*d415bd75Srobert /// This allocates space for the specified virtual register to be held on the
118*d415bd75Srobert /// stack.
getStackSpaceFor(Register VirtReg)119*d415bd75Srobert int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
120*d415bd75Srobert   // Find the location Reg would belong...
121*d415bd75Srobert   int SS = StackSlotForVirtReg[VirtReg];
122*d415bd75Srobert   // Already has space allocated?
123*d415bd75Srobert   if (SS != -1)
124*d415bd75Srobert     return SS;
125*d415bd75Srobert 
126*d415bd75Srobert   // Allocate a new stack object for this spill location...
127*d415bd75Srobert   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
128*d415bd75Srobert   unsigned Size = TRI->getSpillSize(RC);
129*d415bd75Srobert   Align Alignment = TRI->getSpillAlign(RC);
130*d415bd75Srobert   int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
131*d415bd75Srobert 
132*d415bd75Srobert   // Assign the slot.
133*d415bd75Srobert   StackSlotForVirtReg[VirtReg] = FrameIdx;
134*d415bd75Srobert   return FrameIdx;
135*d415bd75Srobert }
136*d415bd75Srobert 
137*d415bd75Srobert /// Returns false if \p VirtReg is known to not live out of the current config.
138*d415bd75Srobert /// If \p VirtReg live out of the current MBB, it must live out of the current
139*d415bd75Srobert /// config
mayLiveOut(Register VirtReg,MachineInstr * CfgMI)140*d415bd75Srobert bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
141*d415bd75Srobert   if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
142*d415bd75Srobert     return true;
143*d415bd75Srobert 
144*d415bd75Srobert   for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
145*d415bd75Srobert     if (UseInst.getParent() != MBB) {
146*d415bd75Srobert       MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
147*d415bd75Srobert       return true;
148*d415bd75Srobert     }
149*d415bd75Srobert 
150*d415bd75Srobert     // The use and def are in the same MBB. If the tile register is
151*d415bd75Srobert     // reconfigured, it is crobbered and we need to spill and reload
152*d415bd75Srobert     // tile register.
153*d415bd75Srobert     if (CfgMI) {
154*d415bd75Srobert       if (dominates(*MBB, *CfgMI, UseInst)) {
155*d415bd75Srobert         MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
156*d415bd75Srobert         return true;
157*d415bd75Srobert       }
158*d415bd75Srobert     }
159*d415bd75Srobert   }
160*d415bd75Srobert 
161*d415bd75Srobert   return false;
162*d415bd75Srobert }
163*d415bd75Srobert 
InitializeTileConfigStackSpace()164*d415bd75Srobert void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
165*d415bd75Srobert   MachineBasicBlock &MBB = MF->front();
166*d415bd75Srobert   MachineInstr *MI = &*MBB.getFirstNonPHI();
167*d415bd75Srobert   DebugLoc DL;
168*d415bd75Srobert   if (ST->hasAVX512()) {
169*d415bd75Srobert     Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
170*d415bd75Srobert     BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
171*d415bd75Srobert     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
172*d415bd75Srobert         .addReg(Zmm);
173*d415bd75Srobert   } else if (ST->hasAVX2()) {
174*d415bd75Srobert     Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
175*d415bd75Srobert     BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
176*d415bd75Srobert     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
177*d415bd75Srobert         .addReg(Ymm);
178*d415bd75Srobert     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
179*d415bd75Srobert                       32)
180*d415bd75Srobert         .addReg(Ymm);
181*d415bd75Srobert   } else {
182*d415bd75Srobert     assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
183*d415bd75Srobert     unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
184*d415bd75Srobert     Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
185*d415bd75Srobert     BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
186*d415bd75Srobert     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
187*d415bd75Srobert         .addReg(Xmm);
188*d415bd75Srobert     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
189*d415bd75Srobert         .addReg(Xmm);
190*d415bd75Srobert     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
191*d415bd75Srobert         .addReg(Xmm);
192*d415bd75Srobert     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
193*d415bd75Srobert         .addReg(Xmm);
194*d415bd75Srobert   }
195*d415bd75Srobert   // Fill in the palette first.
196*d415bd75Srobert   addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
197*d415bd75Srobert       .addImm(1);
198*d415bd75Srobert }
199*d415bd75Srobert 
200*d415bd75Srobert /// Insert spill instruction for \p AssignedReg before \p Before.
201*d415bd75Srobert /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
spill(MachineBasicBlock::iterator Before,Register VirtReg,bool Kill)202*d415bd75Srobert void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
203*d415bd75Srobert                                  Register VirtReg, bool Kill) {
204*d415bd75Srobert   LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
205*d415bd75Srobert   int FI = getStackSpaceFor(VirtReg);
206*d415bd75Srobert   LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
207*d415bd75Srobert 
208*d415bd75Srobert   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
209*d415bd75Srobert   // Don't need shape information for tile store, becasue it is adjacent to
210*d415bd75Srobert   // the tile def instruction.
211*d415bd75Srobert   TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,
212*d415bd75Srobert                            Register());
213*d415bd75Srobert   ++NumStores;
214*d415bd75Srobert 
215*d415bd75Srobert   // TODO: update DBG_VALUEs
216*d415bd75Srobert }
217*d415bd75Srobert 
218*d415bd75Srobert /// Insert reload instruction for \p PhysReg before \p Before.
reload(MachineBasicBlock::iterator UseMI,Register OrigReg,MachineOperand * RowMO,MachineOperand * ColMO)219*d415bd75Srobert void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
220*d415bd75Srobert                                   Register OrigReg, MachineOperand *RowMO,
221*d415bd75Srobert                                   MachineOperand *ColMO) {
222*d415bd75Srobert   int FI = getStackSpaceFor(OrigReg);
223*d415bd75Srobert   const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
224*d415bd75Srobert   Register TileReg;
225*d415bd75Srobert   // Fold copy to tileload
226*d415bd75Srobert   // BB1:
227*d415bd75Srobert   // spill src to s
228*d415bd75Srobert   //
229*d415bd75Srobert   // BB2:
230*d415bd75Srobert   // t = copy src
231*d415bd75Srobert   // -->
232*d415bd75Srobert   // t = tileload (s)
233*d415bd75Srobert   if (UseMI->isCopy())
234*d415bd75Srobert     TileReg = UseMI->getOperand(0).getReg();
235*d415bd75Srobert   else
236*d415bd75Srobert     TileReg = MRI->createVirtualRegister(&RC);
237*d415bd75Srobert   // Can't use TII->loadRegFromStackSlot(), because we need the shape
238*d415bd75Srobert   // information for reload.
239*d415bd75Srobert   // tileloadd (%sp, %idx), %tmm
240*d415bd75Srobert   unsigned Opc = X86::PTILELOADDV;
241*d415bd75Srobert   Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
242*d415bd75Srobert   // FIXME: MBB is not the parent of UseMI.
243*d415bd75Srobert   MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
244*d415bd75Srobert                                 TII->get(X86::MOV64ri), StrideReg)
245*d415bd75Srobert                             .addImm(64);
246*d415bd75Srobert   NewMI = addFrameReference(
247*d415bd75Srobert       BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
248*d415bd75Srobert           .addReg(RowMO->getReg())
249*d415bd75Srobert           .addReg(ColMO->getReg()),
250*d415bd75Srobert       FI);
251*d415bd75Srobert   MachineOperand &MO = NewMI->getOperand(5);
252*d415bd75Srobert   MO.setReg(StrideReg);
253*d415bd75Srobert   MO.setIsKill(true);
254*d415bd75Srobert   RowMO->setIsKill(false);
255*d415bd75Srobert   ColMO->setIsKill(false);
256*d415bd75Srobert   // Erase copy instruction after it is folded.
257*d415bd75Srobert   if (UseMI->isCopy()) {
258*d415bd75Srobert     UseMI->eraseFromParent();
259*d415bd75Srobert   } else {
260*d415bd75Srobert     // Replace the register in the user MI.
261*d415bd75Srobert     for (auto &MO : UseMI->operands()) {
262*d415bd75Srobert       if (MO.isReg() && MO.getReg() == OrigReg)
263*d415bd75Srobert         MO.setReg(TileReg);
264*d415bd75Srobert     }
265*d415bd75Srobert   }
266*d415bd75Srobert 
267*d415bd75Srobert   ++NumLoads;
268*d415bd75Srobert   LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
269*d415bd75Srobert                     << printReg(TileReg, TRI) << '\n');
270*d415bd75Srobert }
271*d415bd75Srobert 
isTileDef(MachineRegisterInfo * MRI,MachineInstr & MI)272*d415bd75Srobert static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
273*d415bd75Srobert   // The instruction must have 3 operands: tile def, row, col.
274*d415bd75Srobert   if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
275*d415bd75Srobert     return false;
276*d415bd75Srobert   MachineOperand &MO = MI.getOperand(0);
277*d415bd75Srobert 
278*d415bd75Srobert   if (MO.isReg()) {
279*d415bd75Srobert     Register Reg = MO.getReg();
280*d415bd75Srobert     // FIXME it may be used after Greedy RA and the physical
281*d415bd75Srobert     // register is not rewritten yet.
282*d415bd75Srobert     if (Reg.isVirtual() &&
283*d415bd75Srobert         MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
284*d415bd75Srobert       return true;
285*d415bd75Srobert     if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
286*d415bd75Srobert       return true;
287*d415bd75Srobert   }
288*d415bd75Srobert 
289*d415bd75Srobert   return false;
290*d415bd75Srobert }
291*d415bd75Srobert 
getShape(MachineRegisterInfo * MRI,Register TileReg)292*d415bd75Srobert static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
293*d415bd75Srobert   MachineInstr *MI = MRI->getVRegDef(TileReg);
294*d415bd75Srobert   if (isTileDef(MRI, *MI)) {
295*d415bd75Srobert     MachineOperand *RowMO = &MI->getOperand(1);
296*d415bd75Srobert     MachineOperand *ColMO = &MI->getOperand(2);
297*d415bd75Srobert     return ShapeT(RowMO, ColMO, MRI);
298*d415bd75Srobert   } else if (MI->isCopy()) {
299*d415bd75Srobert     TileReg = MI->getOperand(1).getReg();
300*d415bd75Srobert     return getShape(MRI, TileReg);
301*d415bd75Srobert   }
302*d415bd75Srobert 
303*d415bd75Srobert   // The def should not be PHI node, because we walk the MBB in reverse post
304*d415bd75Srobert   // order.
305*d415bd75Srobert   assert(MI->isPHI() && "Unexpected PHI when get shape.");
306*d415bd75Srobert   llvm_unreachable("Unexpected MI when get shape.");
307*d415bd75Srobert }
308*d415bd75Srobert 
309*d415bd75Srobert // BB0:
310*d415bd75Srobert // spill t0 to s0
311*d415bd75Srobert // BB1:
312*d415bd75Srobert // spill t1 to s1
313*d415bd75Srobert //
314*d415bd75Srobert // BB2:
315*d415bd75Srobert // t = phi [t0, bb0] [t1, bb1]
316*d415bd75Srobert // -->
317*d415bd75Srobert // row = phi [r0, bb0] [r1, bb1]
318*d415bd75Srobert // col = phi [c0, bb0] [c1, bb1]
319*d415bd75Srobert //   s = phi [s0, bb0] [s1, bb1]
320*d415bd75Srobert //   t = tileload row, col, s
321*d415bd75Srobert // The new instruction is inserted at the end of the phi node. The order
322*d415bd75Srobert // of the original phi node is not ensured.
convertPHI(MachineBasicBlock * MBB,MachineInstr & PHI)323*d415bd75Srobert void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
324*d415bd75Srobert                                       MachineInstr &PHI) {
325*d415bd75Srobert   // 1. Create instruction to get stack slot address of each incoming block.
326*d415bd75Srobert   // 2. Create PHI node for the stack address.
327*d415bd75Srobert   // 3. Create PHI node for shape. If one of the incoming shape is immediate
328*d415bd75Srobert   //    use the immediate and delete the PHI node.
329*d415bd75Srobert   // 4. Create tileload instruction from the stack address.
330*d415bd75Srobert   Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
331*d415bd75Srobert   MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
332*d415bd75Srobert                                         TII->get(X86::PHI), StackAddrReg);
333*d415bd75Srobert   Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
334*d415bd75Srobert   MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
335*d415bd75Srobert                                        TII->get(X86::PHI), RowReg);
336*d415bd75Srobert   Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
337*d415bd75Srobert   MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
338*d415bd75Srobert                                        TII->get(X86::PHI), ColReg);
339*d415bd75Srobert   // Record the mapping of phi node and its row/column information.
340*d415bd75Srobert   VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
341*d415bd75Srobert 
342*d415bd75Srobert   for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
343*d415bd75Srobert     // Get the 2 incoming value of tile register and MBB.
344*d415bd75Srobert     Register InTileReg = PHI.getOperand(I).getReg();
345*d415bd75Srobert     // Mark it as liveout, so that it will be spilled when visit
346*d415bd75Srobert     // the incoming MBB. Otherwise since phi will be deleted, it
347*d415bd75Srobert     // would miss spill when visit incoming MBB.
348*d415bd75Srobert     MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
349*d415bd75Srobert     MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
350*d415bd75Srobert 
351*d415bd75Srobert     MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
352*d415bd75Srobert     MachineBasicBlock::iterator InsertPos;
353*d415bd75Srobert     if (TileDefMI->isPHI()) {
354*d415bd75Srobert       InsertPos = TileDefMI->getParent()->getFirstNonPHI();
355*d415bd75Srobert       if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
356*d415bd75Srobert         //        def t1
357*d415bd75Srobert         //       /       \
358*d415bd75Srobert         //  def t2       t3 = phi(t1, t4) <--
359*d415bd75Srobert         //       \       /                  |
360*d415bd75Srobert         //      t4 = phi(t2, t3)-------------
361*d415bd75Srobert         //
362*d415bd75Srobert         // For each (row, column and stack address) append phi incoming value.
363*d415bd75Srobert         // Create r3 = phi(r1, r4)
364*d415bd75Srobert         // Create r4 = phi(r2, r3)
365*d415bd75Srobert         Register InRowReg = VisitedPHIs[TileDefMI].Row;
366*d415bd75Srobert         Register InColReg = VisitedPHIs[TileDefMI].Col;
367*d415bd75Srobert         Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
368*d415bd75Srobert         RowPHI.addReg(InRowReg).addMBB(InMBB);
369*d415bd75Srobert         ColPHI.addReg(InColReg).addMBB(InMBB);
370*d415bd75Srobert         AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
371*d415bd75Srobert         continue;
372*d415bd75Srobert       } else {
373*d415bd75Srobert         // Recursively convert PHI to tileload
374*d415bd75Srobert         convertPHI(TileDefMI->getParent(), *TileDefMI);
375*d415bd75Srobert         // The PHI node is coverted to tileload instruction. Get the stack
376*d415bd75Srobert         // address from tileload operands.
377*d415bd75Srobert         MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
378*d415bd75Srobert         assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
379*d415bd75Srobert         Register InRowReg = TileLoad->getOperand(1).getReg();
380*d415bd75Srobert         Register InColReg = TileLoad->getOperand(2).getReg();
381*d415bd75Srobert         Register InStackAddrReg = TileLoad->getOperand(3).getReg();
382*d415bd75Srobert         RowPHI.addReg(InRowReg).addMBB(InMBB);
383*d415bd75Srobert         ColPHI.addReg(InColReg).addMBB(InMBB);
384*d415bd75Srobert         AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
385*d415bd75Srobert       }
386*d415bd75Srobert     } else {
387*d415bd75Srobert       InsertPos = TileDefMI->getIterator();
388*d415bd75Srobert 
389*d415bd75Srobert       // Fill the incoming operand of row/column phi instruction.
390*d415bd75Srobert       ShapeT Shape = getShape(MRI, InTileReg);
391*d415bd75Srobert       Shape.getRow()->setIsKill(false);
392*d415bd75Srobert       Shape.getCol()->setIsKill(false);
393*d415bd75Srobert       RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
394*d415bd75Srobert       ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
395*d415bd75Srobert 
396*d415bd75Srobert       // The incoming tile register live out of its def BB, it would be spilled.
397*d415bd75Srobert       // Create MI to get the spill stack slot address for the tile register
398*d415bd75Srobert       int FI = getStackSpaceFor(InTileReg);
399*d415bd75Srobert       Register InStackAddrReg =
400*d415bd75Srobert           MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
401*d415bd75Srobert       addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
402*d415bd75Srobert                         TII->get(X86::LEA64r), InStackAddrReg)
403*d415bd75Srobert                     .addFrameIndex(FI),
404*d415bd75Srobert                 0);
405*d415bd75Srobert       AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
406*d415bd75Srobert     }
407*d415bd75Srobert   }
408*d415bd75Srobert 
409*d415bd75Srobert   MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
410*d415bd75Srobert   Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
411*d415bd75Srobert   BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
412*d415bd75Srobert       .addImm(64);
413*d415bd75Srobert   Register TileReg = PHI.getOperand(0).getReg();
414*d415bd75Srobert   MachineInstr *NewMI = addDirectMem(
415*d415bd75Srobert       BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
416*d415bd75Srobert           .addReg(RowReg)
417*d415bd75Srobert           .addReg(ColReg),
418*d415bd75Srobert       StackAddrReg);
419*d415bd75Srobert   MachineOperand &MO = NewMI->getOperand(5);
420*d415bd75Srobert   MO.setReg(StrideReg);
421*d415bd75Srobert   MO.setIsKill(true);
422*d415bd75Srobert   PHI.eraseFromParent();
423*d415bd75Srobert   VisitedPHIs.erase(&PHI);
424*d415bd75Srobert }
425*d415bd75Srobert 
isTileRegDef(MachineRegisterInfo * MRI,MachineInstr & MI)426*d415bd75Srobert static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
427*d415bd75Srobert   MachineOperand &MO = MI.getOperand(0);
428*d415bd75Srobert   if (MO.isReg() && MO.getReg().isVirtual() &&
429*d415bd75Srobert       MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
430*d415bd75Srobert     return true;
431*d415bd75Srobert   return false;
432*d415bd75Srobert }
433*d415bd75Srobert 
canonicalizePHIs(MachineBasicBlock & MBB)434*d415bd75Srobert void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
435*d415bd75Srobert   SmallVector<MachineInstr *, 8> PHIs;
436*d415bd75Srobert 
437*d415bd75Srobert   for (MachineInstr &MI : MBB) {
438*d415bd75Srobert     if (!MI.isPHI())
439*d415bd75Srobert       break;
440*d415bd75Srobert     if (!isTileRegDef(MRI, MI))
441*d415bd75Srobert       continue;
442*d415bd75Srobert     PHIs.push_back(&MI);
443*d415bd75Srobert   }
444*d415bd75Srobert   // Canonicalize the phi node first. One tile phi may depeneds previous
445*d415bd75Srobert   // phi node. For below case, we need convert %t4.
446*d415bd75Srobert   //
447*d415bd75Srobert   // BB0:
448*d415bd75Srobert   // %t3 = phi (t1 BB1, t2 BB0)
449*d415bd75Srobert   // %t4 = phi (t5 BB1, t3 BB0)
450*d415bd75Srobert   // -->
451*d415bd75Srobert   // %t3 = phi (t1 BB1, t2 BB0)
452*d415bd75Srobert   // %t4 = phi (t5 BB1, t2 BB0)
453*d415bd75Srobert   //
454*d415bd75Srobert   while (!PHIs.empty()) {
455*d415bd75Srobert     MachineInstr *PHI = PHIs.pop_back_val();
456*d415bd75Srobert 
457*d415bd75Srobert     // Find the operand that is incoming from the same MBB and the def
458*d415bd75Srobert     // is also phi node.
459*d415bd75Srobert     MachineOperand *InMO = nullptr;
460*d415bd75Srobert     MachineInstr *DefMI = nullptr;
461*d415bd75Srobert     for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
462*d415bd75Srobert       Register InTileReg = PHI->getOperand(I).getReg();
463*d415bd75Srobert       MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
464*d415bd75Srobert       DefMI = MRI->getVRegDef(InTileReg);
465*d415bd75Srobert       if (InMBB != &MBB || !DefMI->isPHI())
466*d415bd75Srobert         continue;
467*d415bd75Srobert 
468*d415bd75Srobert       InMO = &PHI->getOperand(I);
469*d415bd75Srobert       break;
470*d415bd75Srobert     }
471*d415bd75Srobert     // If can't find such operand, do nothing.
472*d415bd75Srobert     if (!InMO)
473*d415bd75Srobert       continue;
474*d415bd75Srobert 
475*d415bd75Srobert     // Current phi node depends on previous phi node. Break the
476*d415bd75Srobert     // dependency.
477*d415bd75Srobert     Register DefTileReg;
478*d415bd75Srobert     for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
479*d415bd75Srobert       MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
480*d415bd75Srobert       if (InMBB != &MBB)
481*d415bd75Srobert         continue;
482*d415bd75Srobert       DefTileReg = DefMI->getOperand(I).getReg();
483*d415bd75Srobert       InMO->setReg(DefTileReg);
484*d415bd75Srobert       break;
485*d415bd75Srobert     }
486*d415bd75Srobert   }
487*d415bd75Srobert }
488*d415bd75Srobert 
convertPHIs(MachineBasicBlock & MBB)489*d415bd75Srobert void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
490*d415bd75Srobert   SmallVector<MachineInstr *, 8> PHIs;
491*d415bd75Srobert   for (MachineInstr &MI : MBB) {
492*d415bd75Srobert     if (!MI.isPHI())
493*d415bd75Srobert       break;
494*d415bd75Srobert     if (!isTileRegDef(MRI, MI))
495*d415bd75Srobert       continue;
496*d415bd75Srobert     PHIs.push_back(&MI);
497*d415bd75Srobert   }
498*d415bd75Srobert   while (!PHIs.empty()) {
499*d415bd75Srobert     MachineInstr *MI = PHIs.pop_back_val();
500*d415bd75Srobert     VisitedPHIs.clear();
501*d415bd75Srobert     convertPHI(&MBB, *MI);
502*d415bd75Srobert   }
503*d415bd75Srobert }
504*d415bd75Srobert 
505*d415bd75Srobert // PreTileConfig should configure the tile registers based on basic
506*d415bd75Srobert // block.
configBasicBlock(MachineBasicBlock & MBB)507*d415bd75Srobert bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
508*d415bd75Srobert   this->MBB = &MBB;
509*d415bd75Srobert   bool Change = false;
510*d415bd75Srobert   MachineInstr *LastShapeMI = nullptr;
511*d415bd75Srobert   MachineInstr *LastTileCfg = nullptr;
512*d415bd75Srobert   bool HasUnconfigTile = false;
513*d415bd75Srobert 
514*d415bd75Srobert   auto Config = [&](MachineInstr &Before) {
515*d415bd75Srobert     if (CfgSS == -1)
516*d415bd75Srobert       CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
517*d415bd75Srobert                                      ST->getTileConfigAlignment(), false);
518*d415bd75Srobert     LastTileCfg = addFrameReference(
519*d415bd75Srobert         BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
520*d415bd75Srobert     LastShapeMI = nullptr;
521*d415bd75Srobert     Change = true;
522*d415bd75Srobert   };
523*d415bd75Srobert   auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
524*d415bd75Srobert     for (const MachineOperand &MO : MI.operands()) {
525*d415bd75Srobert       if (!MO.isReg())
526*d415bd75Srobert         continue;
527*d415bd75Srobert       Register Reg = MO.getReg();
528*d415bd75Srobert       if (Reg.isVirtual() &&
529*d415bd75Srobert           MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
530*d415bd75Srobert         return true;
531*d415bd75Srobert     }
532*d415bd75Srobert     return false;
533*d415bd75Srobert   };
534*d415bd75Srobert   for (MachineInstr &MI : reverse(MBB)) {
535*d415bd75Srobert     // We have transformed phi node before configuring BB.
536*d415bd75Srobert     if (MI.isPHI())
537*d415bd75Srobert       break;
538*d415bd75Srobert     // Don't collect the shape of used tile, the tile should be defined
539*d415bd75Srobert     // before the tile use. Spill and reload would happen if there is only
540*d415bd75Srobert     // tile use after ldtilecfg, so the shape can be collected from reload.
541*d415bd75Srobert     // Take below code for example. %t would be reloaded before tilestore
542*d415bd75Srobert     // call
543*d415bd75Srobert     // ....
544*d415bd75Srobert     // tilestore %r, %c, %t
545*d415bd75Srobert     // -->
546*d415bd75Srobert     // call
547*d415bd75Srobert     // ldtilecfg
548*d415bd75Srobert     // %t = tileload %r, %c
549*d415bd75Srobert     // tilestore %r, %c, %t
550*d415bd75Srobert     if (HasTileOperand(MRI, MI))
551*d415bd75Srobert       HasUnconfigTile = true;
552*d415bd75Srobert     // According to AMX ABI, all the tile registers including config register
553*d415bd75Srobert     // are volatile. Caller need to save/restore config register.
554*d415bd75Srobert     if (MI.isCall() && HasUnconfigTile) {
555*d415bd75Srobert       MachineBasicBlock::iterator I;
556*d415bd75Srobert       if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
557*d415bd75Srobert         I = ++LastShapeMI->getIterator();
558*d415bd75Srobert       else
559*d415bd75Srobert         I = ++MI.getIterator();
560*d415bd75Srobert       Config(*I);
561*d415bd75Srobert       HasUnconfigTile = false;
562*d415bd75Srobert       continue;
563*d415bd75Srobert     }
564*d415bd75Srobert     if (!isTileDef(MRI, MI))
565*d415bd75Srobert       continue;
566*d415bd75Srobert     //
567*d415bd75Srobert     //---------------------------------------------------------------------
568*d415bd75Srobert     // Don't handle COPY instruction. If the src and dst of the COPY can be
569*d415bd75Srobert     // in the same config in below case, we just check the shape of t0.
570*d415bd75Srobert     // def row0
571*d415bd75Srobert     // def col0
572*d415bd75Srobert     // ldtilecfg
573*d415bd75Srobert     // t0 = tielzero(row0, col0)
574*d415bd75Srobert     // t1 = copy t0
575*d415bd75Srobert     // ...
576*d415bd75Srobert     // If the src and dst of the COPY can NOT be in the same config in below
577*d415bd75Srobert     // case. Reload would be generated befor the copy instruction.
578*d415bd75Srobert     // def row0
579*d415bd75Srobert     // def col0
580*d415bd75Srobert     // t0 = tielzero(row0, col0)
581*d415bd75Srobert     // spill t0
582*d415bd75Srobert     // ...
583*d415bd75Srobert     // def row1
584*d415bd75Srobert     // def col1
585*d415bd75Srobert     // ldtilecfg
586*d415bd75Srobert     // t1 = tilezero(row1, col1)
587*d415bd75Srobert     // reload t0
588*d415bd75Srobert     // t1 = copy t0
589*d415bd75Srobert     //---------------------------------------------------------------------
590*d415bd75Srobert     //
591*d415bd75Srobert     // If MI dominate the last shape def instruction, we need insert
592*d415bd75Srobert     // ldtilecfg after LastShapeMI now. The config doesn't include
593*d415bd75Srobert     // current MI.
594*d415bd75Srobert     //   def row0
595*d415bd75Srobert     //   def col0
596*d415bd75Srobert     //   tilezero(row0, col0)  <- MI
597*d415bd75Srobert     //   def row1
598*d415bd75Srobert     //   def col1
599*d415bd75Srobert     //   ldtilecfg             <- insert
600*d415bd75Srobert     //   tilezero(row1, col1)
601*d415bd75Srobert     if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
602*d415bd75Srobert       Config(*(++LastShapeMI->getIterator()));
603*d415bd75Srobert     MachineOperand *RowMO = &MI.getOperand(1);
604*d415bd75Srobert     MachineOperand *ColMO = &MI.getOperand(2);
605*d415bd75Srobert     MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
606*d415bd75Srobert     MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
607*d415bd75Srobert     // If the shape is defined in current MBB, check the domination.
608*d415bd75Srobert     // FIXME how about loop?
609*d415bd75Srobert     if (RowMI->getParent() == &MBB) {
610*d415bd75Srobert       if (!LastShapeMI)
611*d415bd75Srobert         LastShapeMI = RowMI;
612*d415bd75Srobert       else if (dominates(MBB, LastShapeMI, RowMI))
613*d415bd75Srobert         LastShapeMI = RowMI;
614*d415bd75Srobert     }
615*d415bd75Srobert     if (ColMI->getParent() == &MBB) {
616*d415bd75Srobert       if (!LastShapeMI)
617*d415bd75Srobert         LastShapeMI = ColMI;
618*d415bd75Srobert       else if (dominates(MBB, LastShapeMI, ColMI))
619*d415bd75Srobert         LastShapeMI = ColMI;
620*d415bd75Srobert     }
621*d415bd75Srobert     // If there is user live out of the tilecfg, spill it and reload in
622*d415bd75Srobert     // before the user.
623*d415bd75Srobert     Register TileReg = MI.getOperand(0).getReg();
624*d415bd75Srobert     if (mayLiveOut(TileReg, LastTileCfg))
625*d415bd75Srobert       spill(++MI.getIterator(), TileReg, false);
626*d415bd75Srobert     for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
627*d415bd75Srobert       if (UseMI.getParent() == &MBB) {
628*d415bd75Srobert         // check user should not across ldtilecfg
629*d415bd75Srobert         if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
630*d415bd75Srobert           continue;
631*d415bd75Srobert         // reload befor UseMI
632*d415bd75Srobert         reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
633*d415bd75Srobert       } else {
634*d415bd75Srobert         // Don't reload for phi instruction, we handle phi reload separately.
635*d415bd75Srobert         // TODO: merge the reload for the same user MBB.
636*d415bd75Srobert         if (!UseMI.isPHI())
637*d415bd75Srobert           reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
638*d415bd75Srobert       }
639*d415bd75Srobert     }
640*d415bd75Srobert   }
641*d415bd75Srobert 
642*d415bd75Srobert   // Configure tile registers at the head of the MBB
643*d415bd75Srobert   if (HasUnconfigTile) {
644*d415bd75Srobert     MachineInstr *Before;
645*d415bd75Srobert     if (LastShapeMI == nullptr || LastShapeMI->isPHI())
646*d415bd75Srobert       Before = &*MBB.getFirstNonPHI();
647*d415bd75Srobert     else
648*d415bd75Srobert       Before = &*(++LastShapeMI->getIterator());
649*d415bd75Srobert 
650*d415bd75Srobert     Config(*Before);
651*d415bd75Srobert   }
652*d415bd75Srobert 
653*d415bd75Srobert   return Change;
654*d415bd75Srobert }
655*d415bd75Srobert 
runOnMachineFunction(MachineFunction & MFunc)656*d415bd75Srobert bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
657*d415bd75Srobert   MF = &MFunc;
658*d415bd75Srobert   MRI = &MFunc.getRegInfo();
659*d415bd75Srobert   ST = &MFunc.getSubtarget<X86Subtarget>();
660*d415bd75Srobert   TII = ST->getInstrInfo();
661*d415bd75Srobert   X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
662*d415bd75Srobert   MFI = &MFunc.getFrameInfo();
663*d415bd75Srobert   TRI = ST->getRegisterInfo();
664*d415bd75Srobert   CfgSS = -1;
665*d415bd75Srobert 
666*d415bd75Srobert   unsigned NumVirtRegs = MRI->getNumVirtRegs();
667*d415bd75Srobert   // Abandon early if there is no tile register to config.
668*d415bd75Srobert   bool HasVirtTileReg = false;
669*d415bd75Srobert   for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
670*d415bd75Srobert     Register VirtReg = Register::index2VirtReg(I);
671*d415bd75Srobert     if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
672*d415bd75Srobert       HasVirtTileReg = true;
673*d415bd75Srobert       break;
674*d415bd75Srobert     }
675*d415bd75Srobert   }
676*d415bd75Srobert   if (!HasVirtTileReg)
677*d415bd75Srobert     return false;
678*d415bd75Srobert 
679*d415bd75Srobert   StackSlotForVirtReg.resize(NumVirtRegs);
680*d415bd75Srobert   MayLiveAcrossBlocks.clear();
681*d415bd75Srobert   // We will create register during config. *3 is to make sure
682*d415bd75Srobert   // the virtual register number doesn't exceed the size of
683*d415bd75Srobert   // the bit vector.
684*d415bd75Srobert   MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
685*d415bd75Srobert   bool Change = false;
686*d415bd75Srobert   assert(MRI->isSSA());
687*d415bd75Srobert 
688*d415bd75Srobert   // Canonicalize the phi node first.
689*d415bd75Srobert   for (MachineBasicBlock &MBB : MFunc)
690*d415bd75Srobert     canonicalizePHIs(MBB);
691*d415bd75Srobert 
692*d415bd75Srobert   // Loop over all of the basic blocks in reverse post order and insert
693*d415bd75Srobert   // ldtilecfg for tile registers. The reserse post order is to facilitate
694*d415bd75Srobert   // PHI node convert.
695*d415bd75Srobert   ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
696*d415bd75Srobert   for (MachineBasicBlock *MBB : RPOT) {
697*d415bd75Srobert     convertPHIs(*MBB);
698*d415bd75Srobert     Change |= configBasicBlock(*MBB);
699*d415bd75Srobert   }
700*d415bd75Srobert 
701*d415bd75Srobert   if (Change)
702*d415bd75Srobert     InitializeTileConfigStackSpace();
703*d415bd75Srobert 
704*d415bd75Srobert   StackSlotForVirtReg.clear();
705*d415bd75Srobert   return Change;
706*d415bd75Srobert }
707*d415bd75Srobert 
createX86FastPreTileConfigPass()708*d415bd75Srobert FunctionPass *llvm::createX86FastPreTileConfigPass() {
709*d415bd75Srobert   return new X86FastPreTileConfig();
710*d415bd75Srobert }
711