xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86FixupVectorConstants.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric //===-- X86FixupVectorConstants.cpp - optimize constant generation  -------===//
2*06c3fb27SDimitry Andric //
3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
8*06c3fb27SDimitry Andric //
9*06c3fb27SDimitry Andric // This file examines all full size vector constant pool loads and attempts to
10*06c3fb27SDimitry Andric // replace them with smaller constant pool entries, including:
11*06c3fb27SDimitry Andric // * Converting AVX512 memory-fold instructions to their broadcast-fold form
12*06c3fb27SDimitry Andric // * TODO: Broadcasting of full width loads.
13*06c3fb27SDimitry Andric // * TODO: Sign/Zero extension of full width loads.
14*06c3fb27SDimitry Andric //
15*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
16*06c3fb27SDimitry Andric 
17*06c3fb27SDimitry Andric #include "X86.h"
18*06c3fb27SDimitry Andric #include "X86InstrFoldTables.h"
19*06c3fb27SDimitry Andric #include "X86InstrInfo.h"
20*06c3fb27SDimitry Andric #include "X86Subtarget.h"
21*06c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h"
22*06c3fb27SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
23*06c3fb27SDimitry Andric 
24*06c3fb27SDimitry Andric using namespace llvm;
25*06c3fb27SDimitry Andric 
26*06c3fb27SDimitry Andric #define DEBUG_TYPE "x86-fixup-vector-constants"
27*06c3fb27SDimitry Andric 
28*06c3fb27SDimitry Andric STATISTIC(NumInstChanges, "Number of instructions changes");
29*06c3fb27SDimitry Andric 
30*06c3fb27SDimitry Andric namespace {
31*06c3fb27SDimitry Andric class X86FixupVectorConstantsPass : public MachineFunctionPass {
32*06c3fb27SDimitry Andric public:
33*06c3fb27SDimitry Andric   static char ID;
34*06c3fb27SDimitry Andric 
35*06c3fb27SDimitry Andric   X86FixupVectorConstantsPass() : MachineFunctionPass(ID) {}
36*06c3fb27SDimitry Andric 
37*06c3fb27SDimitry Andric   StringRef getPassName() const override {
38*06c3fb27SDimitry Andric     return "X86 Fixup Vector Constants";
39*06c3fb27SDimitry Andric   }
40*06c3fb27SDimitry Andric 
41*06c3fb27SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
42*06c3fb27SDimitry Andric   bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
43*06c3fb27SDimitry Andric                           MachineInstr &MI);
44*06c3fb27SDimitry Andric 
45*06c3fb27SDimitry Andric   // This pass runs after regalloc and doesn't support VReg operands.
46*06c3fb27SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
47*06c3fb27SDimitry Andric     return MachineFunctionProperties().set(
48*06c3fb27SDimitry Andric         MachineFunctionProperties::Property::NoVRegs);
49*06c3fb27SDimitry Andric   }
50*06c3fb27SDimitry Andric 
51*06c3fb27SDimitry Andric private:
52*06c3fb27SDimitry Andric   const X86InstrInfo *TII = nullptr;
53*06c3fb27SDimitry Andric   const X86Subtarget *ST = nullptr;
54*06c3fb27SDimitry Andric   const MCSchedModel *SM = nullptr;
55*06c3fb27SDimitry Andric };
56*06c3fb27SDimitry Andric } // end anonymous namespace
57*06c3fb27SDimitry Andric 
58*06c3fb27SDimitry Andric char X86FixupVectorConstantsPass::ID = 0;
59*06c3fb27SDimitry Andric 
60*06c3fb27SDimitry Andric INITIALIZE_PASS(X86FixupVectorConstantsPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
61*06c3fb27SDimitry Andric 
62*06c3fb27SDimitry Andric FunctionPass *llvm::createX86FixupVectorConstants() {
63*06c3fb27SDimitry Andric   return new X86FixupVectorConstantsPass();
64*06c3fb27SDimitry Andric }
65*06c3fb27SDimitry Andric 
66*06c3fb27SDimitry Andric static const Constant *getConstantFromPool(const MachineInstr &MI,
67*06c3fb27SDimitry Andric                                            const MachineOperand &Op) {
68*06c3fb27SDimitry Andric   if (!Op.isCPI() || Op.getOffset() != 0)
69*06c3fb27SDimitry Andric     return nullptr;
70*06c3fb27SDimitry Andric 
71*06c3fb27SDimitry Andric   ArrayRef<MachineConstantPoolEntry> Constants =
72*06c3fb27SDimitry Andric       MI.getParent()->getParent()->getConstantPool()->getConstants();
73*06c3fb27SDimitry Andric   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
74*06c3fb27SDimitry Andric 
75*06c3fb27SDimitry Andric   // Bail if this is a machine constant pool entry, we won't be able to dig out
76*06c3fb27SDimitry Andric   // anything useful.
77*06c3fb27SDimitry Andric   if (ConstantEntry.isMachineConstantPoolEntry())
78*06c3fb27SDimitry Andric     return nullptr;
79*06c3fb27SDimitry Andric 
80*06c3fb27SDimitry Andric   return ConstantEntry.Val.ConstVal;
81*06c3fb27SDimitry Andric }
82*06c3fb27SDimitry Andric 
83*06c3fb27SDimitry Andric // Attempt to extract the full width of bits data from the constant.
84*06c3fb27SDimitry Andric static std::optional<APInt> extractConstantBits(const Constant *C) {
85*06c3fb27SDimitry Andric   unsigned NumBits = C->getType()->getPrimitiveSizeInBits();
86*06c3fb27SDimitry Andric 
87*06c3fb27SDimitry Andric   if (auto *CInt = dyn_cast<ConstantInt>(C))
88*06c3fb27SDimitry Andric     return CInt->getValue();
89*06c3fb27SDimitry Andric 
90*06c3fb27SDimitry Andric   if (auto *CFP = dyn_cast<ConstantFP>(C))
91*06c3fb27SDimitry Andric     return CFP->getValue().bitcastToAPInt();
92*06c3fb27SDimitry Andric 
93*06c3fb27SDimitry Andric   if (auto *CV = dyn_cast<ConstantVector>(C)) {
94*06c3fb27SDimitry Andric     if (auto *CVSplat = CV->getSplatValue(/*AllowUndefs*/ true)) {
95*06c3fb27SDimitry Andric       if (std::optional<APInt> Bits = extractConstantBits(CVSplat)) {
96*06c3fb27SDimitry Andric         assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat");
97*06c3fb27SDimitry Andric         return APInt::getSplat(NumBits, *Bits);
98*06c3fb27SDimitry Andric       }
99*06c3fb27SDimitry Andric     }
100*06c3fb27SDimitry Andric   }
101*06c3fb27SDimitry Andric 
102*06c3fb27SDimitry Andric   if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
103*06c3fb27SDimitry Andric     bool IsInteger = CDS->getElementType()->isIntegerTy();
104*06c3fb27SDimitry Andric     bool IsFloat = CDS->getElementType()->isHalfTy() ||
105*06c3fb27SDimitry Andric                    CDS->getElementType()->isBFloatTy() ||
106*06c3fb27SDimitry Andric                    CDS->getElementType()->isFloatTy() ||
107*06c3fb27SDimitry Andric                    CDS->getElementType()->isDoubleTy();
108*06c3fb27SDimitry Andric     if (IsInteger || IsFloat) {
109*06c3fb27SDimitry Andric       APInt Bits = APInt::getZero(NumBits);
110*06c3fb27SDimitry Andric       unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
111*06c3fb27SDimitry Andric       for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
112*06c3fb27SDimitry Andric         if (IsInteger)
113*06c3fb27SDimitry Andric           Bits.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
114*06c3fb27SDimitry Andric         else
115*06c3fb27SDimitry Andric           Bits.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),
116*06c3fb27SDimitry Andric                           I * EltBits);
117*06c3fb27SDimitry Andric       }
118*06c3fb27SDimitry Andric       return Bits;
119*06c3fb27SDimitry Andric     }
120*06c3fb27SDimitry Andric   }
121*06c3fb27SDimitry Andric 
122*06c3fb27SDimitry Andric   return std::nullopt;
123*06c3fb27SDimitry Andric }
124*06c3fb27SDimitry Andric 
125*06c3fb27SDimitry Andric // Attempt to compute the splat width of bits data by normalizing the splat to
126*06c3fb27SDimitry Andric // remove undefs.
127*06c3fb27SDimitry Andric static std::optional<APInt> getSplatableConstant(const Constant *C,
128*06c3fb27SDimitry Andric                                                  unsigned SplatBitWidth) {
129*06c3fb27SDimitry Andric   const Type *Ty = C->getType();
130*06c3fb27SDimitry Andric   assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 &&
131*06c3fb27SDimitry Andric          "Illegal splat width");
132*06c3fb27SDimitry Andric 
133*06c3fb27SDimitry Andric   if (std::optional<APInt> Bits = extractConstantBits(C))
134*06c3fb27SDimitry Andric     if (Bits->isSplat(SplatBitWidth))
135*06c3fb27SDimitry Andric       return Bits->trunc(SplatBitWidth);
136*06c3fb27SDimitry Andric 
137*06c3fb27SDimitry Andric   // Detect general splats with undefs.
138*06c3fb27SDimitry Andric   // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting?
139*06c3fb27SDimitry Andric   if (auto *CV = dyn_cast<ConstantVector>(C)) {
140*06c3fb27SDimitry Andric     unsigned NumOps = CV->getNumOperands();
141*06c3fb27SDimitry Andric     unsigned NumEltsBits = Ty->getScalarSizeInBits();
142*06c3fb27SDimitry Andric     unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
143*06c3fb27SDimitry Andric     if ((SplatBitWidth % NumEltsBits) == 0) {
144*06c3fb27SDimitry Andric       // Collect the elements and ensure that within the repeated splat sequence
145*06c3fb27SDimitry Andric       // they either match or are undef.
146*06c3fb27SDimitry Andric       SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr);
147*06c3fb27SDimitry Andric       for (unsigned Idx = 0; Idx != NumOps; ++Idx) {
148*06c3fb27SDimitry Andric         if (Constant *Elt = CV->getAggregateElement(Idx)) {
149*06c3fb27SDimitry Andric           if (isa<UndefValue>(Elt))
150*06c3fb27SDimitry Andric             continue;
151*06c3fb27SDimitry Andric           unsigned SplatIdx = Idx % NumScaleOps;
152*06c3fb27SDimitry Andric           if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
153*06c3fb27SDimitry Andric             Sequence[SplatIdx] = Elt;
154*06c3fb27SDimitry Andric             continue;
155*06c3fb27SDimitry Andric           }
156*06c3fb27SDimitry Andric         }
157*06c3fb27SDimitry Andric         return std::nullopt;
158*06c3fb27SDimitry Andric       }
159*06c3fb27SDimitry Andric       // Extract the constant bits forming the splat and insert into the bits
160*06c3fb27SDimitry Andric       // data, leave undef as zero.
161*06c3fb27SDimitry Andric       APInt SplatBits = APInt::getZero(SplatBitWidth);
162*06c3fb27SDimitry Andric       for (unsigned I = 0; I != NumScaleOps; ++I) {
163*06c3fb27SDimitry Andric         if (!Sequence[I])
164*06c3fb27SDimitry Andric           continue;
165*06c3fb27SDimitry Andric         if (std::optional<APInt> Bits = extractConstantBits(Sequence[I])) {
166*06c3fb27SDimitry Andric           SplatBits.insertBits(*Bits, I * Bits->getBitWidth());
167*06c3fb27SDimitry Andric           continue;
168*06c3fb27SDimitry Andric         }
169*06c3fb27SDimitry Andric         return std::nullopt;
170*06c3fb27SDimitry Andric       }
171*06c3fb27SDimitry Andric       return SplatBits;
172*06c3fb27SDimitry Andric     }
173*06c3fb27SDimitry Andric   }
174*06c3fb27SDimitry Andric 
175*06c3fb27SDimitry Andric   return std::nullopt;
176*06c3fb27SDimitry Andric }
177*06c3fb27SDimitry Andric 
178*06c3fb27SDimitry Andric // Attempt to rebuild a normalized splat vector constant of the requested splat
179*06c3fb27SDimitry Andric // width, built up of potentially smaller scalar values.
180*06c3fb27SDimitry Andric // NOTE: We don't always bother converting to scalars if the vector length is 1.
181*06c3fb27SDimitry Andric static Constant *rebuildSplatableConstant(const Constant *C,
182*06c3fb27SDimitry Andric                                           unsigned SplatBitWidth) {
183*06c3fb27SDimitry Andric   std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth);
184*06c3fb27SDimitry Andric   if (!Splat)
185*06c3fb27SDimitry Andric     return nullptr;
186*06c3fb27SDimitry Andric 
187*06c3fb27SDimitry Andric   // Determine scalar size to use for the constant splat vector, clamping as we
188*06c3fb27SDimitry Andric   // might have found a splat smaller than the original constant data.
189*06c3fb27SDimitry Andric   const Type *OriginalType = C->getType();
190*06c3fb27SDimitry Andric   Type *SclTy = OriginalType->getScalarType();
191*06c3fb27SDimitry Andric   unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
192*06c3fb27SDimitry Andric   NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
193*06c3fb27SDimitry Andric 
194*06c3fb27SDimitry Andric   if (NumSclBits == 8) {
195*06c3fb27SDimitry Andric     SmallVector<uint8_t> RawBits;
196*06c3fb27SDimitry Andric     for (unsigned I = 0; I != SplatBitWidth; I += 8)
197*06c3fb27SDimitry Andric       RawBits.push_back(Splat->extractBits(8, I).getZExtValue());
198*06c3fb27SDimitry Andric     return ConstantDataVector::get(OriginalType->getContext(), RawBits);
199*06c3fb27SDimitry Andric   }
200*06c3fb27SDimitry Andric 
201*06c3fb27SDimitry Andric   if (NumSclBits == 16) {
202*06c3fb27SDimitry Andric     SmallVector<uint16_t> RawBits;
203*06c3fb27SDimitry Andric     for (unsigned I = 0; I != SplatBitWidth; I += 16)
204*06c3fb27SDimitry Andric       RawBits.push_back(Splat->extractBits(16, I).getZExtValue());
205*06c3fb27SDimitry Andric     if (SclTy->is16bitFPTy())
206*06c3fb27SDimitry Andric       return ConstantDataVector::getFP(SclTy, RawBits);
207*06c3fb27SDimitry Andric     return ConstantDataVector::get(OriginalType->getContext(), RawBits);
208*06c3fb27SDimitry Andric   }
209*06c3fb27SDimitry Andric 
210*06c3fb27SDimitry Andric   if (NumSclBits == 32) {
211*06c3fb27SDimitry Andric     SmallVector<uint32_t> RawBits;
212*06c3fb27SDimitry Andric     for (unsigned I = 0; I != SplatBitWidth; I += 32)
213*06c3fb27SDimitry Andric       RawBits.push_back(Splat->extractBits(32, I).getZExtValue());
214*06c3fb27SDimitry Andric     if (SclTy->isFloatTy())
215*06c3fb27SDimitry Andric       return ConstantDataVector::getFP(SclTy, RawBits);
216*06c3fb27SDimitry Andric     return ConstantDataVector::get(OriginalType->getContext(), RawBits);
217*06c3fb27SDimitry Andric   }
218*06c3fb27SDimitry Andric 
219*06c3fb27SDimitry Andric   // Fallback to i64 / double.
220*06c3fb27SDimitry Andric   SmallVector<uint64_t> RawBits;
221*06c3fb27SDimitry Andric   for (unsigned I = 0; I != SplatBitWidth; I += 64)
222*06c3fb27SDimitry Andric     RawBits.push_back(Splat->extractBits(64, I).getZExtValue());
223*06c3fb27SDimitry Andric   if (SclTy->isDoubleTy())
224*06c3fb27SDimitry Andric     return ConstantDataVector::getFP(SclTy, RawBits);
225*06c3fb27SDimitry Andric   return ConstantDataVector::get(OriginalType->getContext(), RawBits);
226*06c3fb27SDimitry Andric }
227*06c3fb27SDimitry Andric 
228*06c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
229*06c3fb27SDimitry Andric                                                      MachineBasicBlock &MBB,
230*06c3fb27SDimitry Andric                                                      MachineInstr &MI) {
231*06c3fb27SDimitry Andric   unsigned Opc = MI.getOpcode();
232*06c3fb27SDimitry Andric   MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
233*06c3fb27SDimitry Andric   bool HasDQI = ST->hasDQI();
234*06c3fb27SDimitry Andric   bool HasBWI = ST->hasBWI();
235*06c3fb27SDimitry Andric 
236*06c3fb27SDimitry Andric   auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128,
237*06c3fb27SDimitry Andric                                 unsigned OpBcst64, unsigned OpBcst32,
238*06c3fb27SDimitry Andric                                 unsigned OpBcst16, unsigned OpBcst8,
239*06c3fb27SDimitry Andric                                 unsigned OperandNo) {
240*06c3fb27SDimitry Andric     assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
241*06c3fb27SDimitry Andric            "Unexpected number of operands!");
242*06c3fb27SDimitry Andric 
243*06c3fb27SDimitry Andric     MachineOperand &CstOp = MI.getOperand(OperandNo + X86::AddrDisp);
244*06c3fb27SDimitry Andric     if (auto *C = getConstantFromPool(MI, CstOp)) {
245*06c3fb27SDimitry Andric       // Attempt to detect a suitable splat from increasing splat widths.
246*06c3fb27SDimitry Andric       std::pair<unsigned, unsigned> Broadcasts[] = {
247*06c3fb27SDimitry Andric           {8, OpBcst8},   {16, OpBcst16},   {32, OpBcst32},
248*06c3fb27SDimitry Andric           {64, OpBcst64}, {128, OpBcst128}, {256, OpBcst256},
249*06c3fb27SDimitry Andric       };
250*06c3fb27SDimitry Andric       for (auto [BitWidth, OpBcst] : Broadcasts) {
251*06c3fb27SDimitry Andric         if (OpBcst) {
252*06c3fb27SDimitry Andric           // Construct a suitable splat constant and adjust the MI to
253*06c3fb27SDimitry Andric           // use the new constant pool entry.
254*06c3fb27SDimitry Andric           if (Constant *NewCst = rebuildSplatableConstant(C, BitWidth)) {
255*06c3fb27SDimitry Andric             unsigned NewCPI =
256*06c3fb27SDimitry Andric                 CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8));
257*06c3fb27SDimitry Andric             MI.setDesc(TII->get(OpBcst));
258*06c3fb27SDimitry Andric             CstOp.setIndex(NewCPI);
259*06c3fb27SDimitry Andric             return true;
260*06c3fb27SDimitry Andric           }
261*06c3fb27SDimitry Andric         }
262*06c3fb27SDimitry Andric       }
263*06c3fb27SDimitry Andric     }
264*06c3fb27SDimitry Andric     return false;
265*06c3fb27SDimitry Andric   };
266*06c3fb27SDimitry Andric 
267*06c3fb27SDimitry Andric   // Attempt to convert full width vector loads into broadcast loads.
268*06c3fb27SDimitry Andric   switch (Opc) {
269*06c3fb27SDimitry Andric   /* FP Loads */
270*06c3fb27SDimitry Andric   case X86::MOVAPDrm:
271*06c3fb27SDimitry Andric   case X86::MOVAPSrm:
272*06c3fb27SDimitry Andric   case X86::MOVUPDrm:
273*06c3fb27SDimitry Andric   case X86::MOVUPSrm:
274*06c3fb27SDimitry Andric     // TODO: SSE3 MOVDDUP Handling
275*06c3fb27SDimitry Andric     return false;
276*06c3fb27SDimitry Andric   case X86::VMOVAPDrm:
277*06c3fb27SDimitry Andric   case X86::VMOVAPSrm:
278*06c3fb27SDimitry Andric   case X86::VMOVUPDrm:
279*06c3fb27SDimitry Andric   case X86::VMOVUPSrm:
280*06c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
281*06c3fb27SDimitry Andric                               1);
282*06c3fb27SDimitry Andric   case X86::VMOVAPDYrm:
283*06c3fb27SDimitry Andric   case X86::VMOVAPSYrm:
284*06c3fb27SDimitry Andric   case X86::VMOVUPDYrm:
285*06c3fb27SDimitry Andric   case X86::VMOVUPSYrm:
286*06c3fb27SDimitry Andric     return ConvertToBroadcast(0, X86::VBROADCASTF128, X86::VBROADCASTSDYrm,
287*06c3fb27SDimitry Andric                               X86::VBROADCASTSSYrm, 0, 0, 1);
288*06c3fb27SDimitry Andric   case X86::VMOVAPDZ128rm:
289*06c3fb27SDimitry Andric   case X86::VMOVAPSZ128rm:
290*06c3fb27SDimitry Andric   case X86::VMOVUPDZ128rm:
291*06c3fb27SDimitry Andric   case X86::VMOVUPSZ128rm:
292*06c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm,
293*06c3fb27SDimitry Andric                               X86::VBROADCASTSSZ128rm, 0, 0, 1);
294*06c3fb27SDimitry Andric   case X86::VMOVAPDZ256rm:
295*06c3fb27SDimitry Andric   case X86::VMOVAPSZ256rm:
296*06c3fb27SDimitry Andric   case X86::VMOVUPDZ256rm:
297*06c3fb27SDimitry Andric   case X86::VMOVUPSZ256rm:
298*06c3fb27SDimitry Andric     return ConvertToBroadcast(
299*06c3fb27SDimitry Andric         0, HasDQI ? X86::VBROADCASTF64X2Z128rm : X86::VBROADCASTF32X4Z256rm,
300*06c3fb27SDimitry Andric         X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm, 0, 0, 1);
301*06c3fb27SDimitry Andric   case X86::VMOVAPDZrm:
302*06c3fb27SDimitry Andric   case X86::VMOVAPSZrm:
303*06c3fb27SDimitry Andric   case X86::VMOVUPDZrm:
304*06c3fb27SDimitry Andric   case X86::VMOVUPSZrm:
305*06c3fb27SDimitry Andric     return ConvertToBroadcast(
306*06c3fb27SDimitry Andric         HasDQI ? X86::VBROADCASTF32X8rm : X86::VBROADCASTF64X4rm,
307*06c3fb27SDimitry Andric         HasDQI ? X86::VBROADCASTF64X2rm : X86::VBROADCASTF32X4rm,
308*06c3fb27SDimitry Andric         X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0, 1);
309*06c3fb27SDimitry Andric     /* Integer Loads */
310*06c3fb27SDimitry Andric   case X86::VMOVDQArm:
311*06c3fb27SDimitry Andric   case X86::VMOVDQUrm:
312*06c3fb27SDimitry Andric     if (ST->hasAVX2())
313*06c3fb27SDimitry Andric       return ConvertToBroadcast(0, 0, X86::VPBROADCASTQrm, X86::VPBROADCASTDrm,
314*06c3fb27SDimitry Andric                                 X86::VPBROADCASTWrm, X86::VPBROADCASTBrm, 1);
315*06c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
316*06c3fb27SDimitry Andric                               1);
317*06c3fb27SDimitry Andric   case X86::VMOVDQAYrm:
318*06c3fb27SDimitry Andric   case X86::VMOVDQUYrm:
319*06c3fb27SDimitry Andric     if (ST->hasAVX2())
320*06c3fb27SDimitry Andric       return ConvertToBroadcast(0, X86::VBROADCASTI128, X86::VPBROADCASTQYrm,
321*06c3fb27SDimitry Andric                                 X86::VPBROADCASTDYrm, X86::VPBROADCASTWYrm,
322*06c3fb27SDimitry Andric                                 X86::VPBROADCASTBYrm, 1);
323*06c3fb27SDimitry Andric     return ConvertToBroadcast(0, X86::VBROADCASTF128, X86::VBROADCASTSDYrm,
324*06c3fb27SDimitry Andric                               X86::VBROADCASTSSYrm, 0, 0, 1);
325*06c3fb27SDimitry Andric   case X86::VMOVDQA32Z128rm:
326*06c3fb27SDimitry Andric   case X86::VMOVDQA64Z128rm:
327*06c3fb27SDimitry Andric   case X86::VMOVDQU32Z128rm:
328*06c3fb27SDimitry Andric   case X86::VMOVDQU64Z128rm:
329*06c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm,
330*06c3fb27SDimitry Andric                               X86::VPBROADCASTDZ128rm,
331*06c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTWZ128rm : 0,
332*06c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1);
333*06c3fb27SDimitry Andric   case X86::VMOVDQA32Z256rm:
334*06c3fb27SDimitry Andric   case X86::VMOVDQA64Z256rm:
335*06c3fb27SDimitry Andric   case X86::VMOVDQU32Z256rm:
336*06c3fb27SDimitry Andric   case X86::VMOVDQU64Z256rm:
337*06c3fb27SDimitry Andric     return ConvertToBroadcast(
338*06c3fb27SDimitry Andric         0, HasDQI ? X86::VBROADCASTI64X2Z128rm : X86::VBROADCASTI32X4Z256rm,
339*06c3fb27SDimitry Andric         X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm,
340*06c3fb27SDimitry Andric         HasBWI ? X86::VPBROADCASTWZ256rm : 0,
341*06c3fb27SDimitry Andric         HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1);
342*06c3fb27SDimitry Andric   case X86::VMOVDQA32Zrm:
343*06c3fb27SDimitry Andric   case X86::VMOVDQA64Zrm:
344*06c3fb27SDimitry Andric   case X86::VMOVDQU32Zrm:
345*06c3fb27SDimitry Andric   case X86::VMOVDQU64Zrm:
346*06c3fb27SDimitry Andric     return ConvertToBroadcast(
347*06c3fb27SDimitry Andric         HasDQI ? X86::VBROADCASTI32X8rm : X86::VBROADCASTI64X4rm,
348*06c3fb27SDimitry Andric         HasDQI ? X86::VBROADCASTI64X2rm : X86::VBROADCASTI32X4rm,
349*06c3fb27SDimitry Andric         X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
350*06c3fb27SDimitry Andric         HasBWI ? X86::VPBROADCASTWZrm : 0, HasBWI ? X86::VPBROADCASTBZrm : 0,
351*06c3fb27SDimitry Andric         1);
352*06c3fb27SDimitry Andric   }
353*06c3fb27SDimitry Andric 
354*06c3fb27SDimitry Andric   // Attempt to find a AVX512 mapping from a full width memory-fold instruction
355*06c3fb27SDimitry Andric   // to a broadcast-fold instruction variant.
356*06c3fb27SDimitry Andric   if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX) {
357*06c3fb27SDimitry Andric     unsigned OpBcst32 = 0, OpBcst64 = 0;
358*06c3fb27SDimitry Andric     unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
359*06c3fb27SDimitry Andric     if (const X86MemoryFoldTableEntry *Mem2Bcst =
360*06c3fb27SDimitry Andric             llvm::lookupBroadcastFoldTable(Opc, 32)) {
361*06c3fb27SDimitry Andric       OpBcst32 = Mem2Bcst->DstOp;
362*06c3fb27SDimitry Andric       OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
363*06c3fb27SDimitry Andric     }
364*06c3fb27SDimitry Andric     if (const X86MemoryFoldTableEntry *Mem2Bcst =
365*06c3fb27SDimitry Andric             llvm::lookupBroadcastFoldTable(Opc, 64)) {
366*06c3fb27SDimitry Andric       OpBcst64 = Mem2Bcst->DstOp;
367*06c3fb27SDimitry Andric       OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
368*06c3fb27SDimitry Andric     }
369*06c3fb27SDimitry Andric     assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
370*06c3fb27SDimitry Andric            "OperandNo mismatch");
371*06c3fb27SDimitry Andric 
372*06c3fb27SDimitry Andric     if (OpBcst32 || OpBcst64) {
373*06c3fb27SDimitry Andric       unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
374*06c3fb27SDimitry Andric       return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo);
375*06c3fb27SDimitry Andric     }
376*06c3fb27SDimitry Andric   }
377*06c3fb27SDimitry Andric 
378*06c3fb27SDimitry Andric   return false;
379*06c3fb27SDimitry Andric }
380*06c3fb27SDimitry Andric 
381*06c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::runOnMachineFunction(MachineFunction &MF) {
382*06c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n";);
383*06c3fb27SDimitry Andric   bool Changed = false;
384*06c3fb27SDimitry Andric   ST = &MF.getSubtarget<X86Subtarget>();
385*06c3fb27SDimitry Andric   TII = ST->getInstrInfo();
386*06c3fb27SDimitry Andric   SM = &ST->getSchedModel();
387*06c3fb27SDimitry Andric 
388*06c3fb27SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
389*06c3fb27SDimitry Andric     for (MachineInstr &MI : MBB) {
390*06c3fb27SDimitry Andric       if (processInstruction(MF, MBB, MI)) {
391*06c3fb27SDimitry Andric         ++NumInstChanges;
392*06c3fb27SDimitry Andric         Changed = true;
393*06c3fb27SDimitry Andric       }
394*06c3fb27SDimitry Andric     }
395*06c3fb27SDimitry Andric   }
396*06c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n";);
397*06c3fb27SDimitry Andric   return Changed;
398*06c3fb27SDimitry Andric }
399