xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86FixupVectorConstants.cpp (revision 7a6dacaca14b62ca4b74406814becb87a3fefac0)
106c3fb27SDimitry Andric //===-- X86FixupVectorConstants.cpp - optimize constant generation  -------===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric //
906c3fb27SDimitry Andric // This file examines all full size vector constant pool loads and attempts to
1006c3fb27SDimitry Andric // replace them with smaller constant pool entries, including:
1106c3fb27SDimitry Andric // * Converting AVX512 memory-fold instructions to their broadcast-fold form
125f757f3fSDimitry Andric // * Broadcasting of full width loads.
1306c3fb27SDimitry Andric // * TODO: Sign/Zero extension of full width loads.
1406c3fb27SDimitry Andric //
1506c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
1606c3fb27SDimitry Andric 
1706c3fb27SDimitry Andric #include "X86.h"
1806c3fb27SDimitry Andric #include "X86InstrFoldTables.h"
1906c3fb27SDimitry Andric #include "X86InstrInfo.h"
2006c3fb27SDimitry Andric #include "X86Subtarget.h"
2106c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h"
2206c3fb27SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
2306c3fb27SDimitry Andric 
2406c3fb27SDimitry Andric using namespace llvm;
2506c3fb27SDimitry Andric 
2606c3fb27SDimitry Andric #define DEBUG_TYPE "x86-fixup-vector-constants"
2706c3fb27SDimitry Andric 
2806c3fb27SDimitry Andric STATISTIC(NumInstChanges, "Number of instructions changes");
2906c3fb27SDimitry Andric 
3006c3fb27SDimitry Andric namespace {
3106c3fb27SDimitry Andric class X86FixupVectorConstantsPass : public MachineFunctionPass {
3206c3fb27SDimitry Andric public:
3306c3fb27SDimitry Andric   static char ID;
3406c3fb27SDimitry Andric 
3506c3fb27SDimitry Andric   X86FixupVectorConstantsPass() : MachineFunctionPass(ID) {}
3606c3fb27SDimitry Andric 
3706c3fb27SDimitry Andric   StringRef getPassName() const override {
3806c3fb27SDimitry Andric     return "X86 Fixup Vector Constants";
3906c3fb27SDimitry Andric   }
4006c3fb27SDimitry Andric 
4106c3fb27SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
4206c3fb27SDimitry Andric   bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
4306c3fb27SDimitry Andric                           MachineInstr &MI);
4406c3fb27SDimitry Andric 
4506c3fb27SDimitry Andric   // This pass runs after regalloc and doesn't support VReg operands.
4606c3fb27SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
4706c3fb27SDimitry Andric     return MachineFunctionProperties().set(
4806c3fb27SDimitry Andric         MachineFunctionProperties::Property::NoVRegs);
4906c3fb27SDimitry Andric   }
5006c3fb27SDimitry Andric 
5106c3fb27SDimitry Andric private:
5206c3fb27SDimitry Andric   const X86InstrInfo *TII = nullptr;
5306c3fb27SDimitry Andric   const X86Subtarget *ST = nullptr;
5406c3fb27SDimitry Andric   const MCSchedModel *SM = nullptr;
5506c3fb27SDimitry Andric };
5606c3fb27SDimitry Andric } // end anonymous namespace
5706c3fb27SDimitry Andric 
5806c3fb27SDimitry Andric char X86FixupVectorConstantsPass::ID = 0;
5906c3fb27SDimitry Andric 
6006c3fb27SDimitry Andric INITIALIZE_PASS(X86FixupVectorConstantsPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
6106c3fb27SDimitry Andric 
6206c3fb27SDimitry Andric FunctionPass *llvm::createX86FixupVectorConstants() {
6306c3fb27SDimitry Andric   return new X86FixupVectorConstantsPass();
6406c3fb27SDimitry Andric }
6506c3fb27SDimitry Andric 
6606c3fb27SDimitry Andric // Attempt to extract the full width of bits data from the constant.
6706c3fb27SDimitry Andric static std::optional<APInt> extractConstantBits(const Constant *C) {
6806c3fb27SDimitry Andric   unsigned NumBits = C->getType()->getPrimitiveSizeInBits();
6906c3fb27SDimitry Andric 
7006c3fb27SDimitry Andric   if (auto *CInt = dyn_cast<ConstantInt>(C))
7106c3fb27SDimitry Andric     return CInt->getValue();
7206c3fb27SDimitry Andric 
7306c3fb27SDimitry Andric   if (auto *CFP = dyn_cast<ConstantFP>(C))
7406c3fb27SDimitry Andric     return CFP->getValue().bitcastToAPInt();
7506c3fb27SDimitry Andric 
7606c3fb27SDimitry Andric   if (auto *CV = dyn_cast<ConstantVector>(C)) {
7706c3fb27SDimitry Andric     if (auto *CVSplat = CV->getSplatValue(/*AllowUndefs*/ true)) {
7806c3fb27SDimitry Andric       if (std::optional<APInt> Bits = extractConstantBits(CVSplat)) {
7906c3fb27SDimitry Andric         assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat");
8006c3fb27SDimitry Andric         return APInt::getSplat(NumBits, *Bits);
8106c3fb27SDimitry Andric       }
8206c3fb27SDimitry Andric     }
8306c3fb27SDimitry Andric   }
8406c3fb27SDimitry Andric 
8506c3fb27SDimitry Andric   if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
8606c3fb27SDimitry Andric     bool IsInteger = CDS->getElementType()->isIntegerTy();
8706c3fb27SDimitry Andric     bool IsFloat = CDS->getElementType()->isHalfTy() ||
8806c3fb27SDimitry Andric                    CDS->getElementType()->isBFloatTy() ||
8906c3fb27SDimitry Andric                    CDS->getElementType()->isFloatTy() ||
9006c3fb27SDimitry Andric                    CDS->getElementType()->isDoubleTy();
9106c3fb27SDimitry Andric     if (IsInteger || IsFloat) {
9206c3fb27SDimitry Andric       APInt Bits = APInt::getZero(NumBits);
9306c3fb27SDimitry Andric       unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
9406c3fb27SDimitry Andric       for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
9506c3fb27SDimitry Andric         if (IsInteger)
9606c3fb27SDimitry Andric           Bits.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
9706c3fb27SDimitry Andric         else
9806c3fb27SDimitry Andric           Bits.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),
9906c3fb27SDimitry Andric                           I * EltBits);
10006c3fb27SDimitry Andric       }
10106c3fb27SDimitry Andric       return Bits;
10206c3fb27SDimitry Andric     }
10306c3fb27SDimitry Andric   }
10406c3fb27SDimitry Andric 
10506c3fb27SDimitry Andric   return std::nullopt;
10606c3fb27SDimitry Andric }
10706c3fb27SDimitry Andric 
10806c3fb27SDimitry Andric // Attempt to compute the splat width of bits data by normalizing the splat to
10906c3fb27SDimitry Andric // remove undefs.
11006c3fb27SDimitry Andric static std::optional<APInt> getSplatableConstant(const Constant *C,
11106c3fb27SDimitry Andric                                                  unsigned SplatBitWidth) {
11206c3fb27SDimitry Andric   const Type *Ty = C->getType();
11306c3fb27SDimitry Andric   assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 &&
11406c3fb27SDimitry Andric          "Illegal splat width");
11506c3fb27SDimitry Andric 
11606c3fb27SDimitry Andric   if (std::optional<APInt> Bits = extractConstantBits(C))
11706c3fb27SDimitry Andric     if (Bits->isSplat(SplatBitWidth))
11806c3fb27SDimitry Andric       return Bits->trunc(SplatBitWidth);
11906c3fb27SDimitry Andric 
12006c3fb27SDimitry Andric   // Detect general splats with undefs.
12106c3fb27SDimitry Andric   // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting?
12206c3fb27SDimitry Andric   if (auto *CV = dyn_cast<ConstantVector>(C)) {
12306c3fb27SDimitry Andric     unsigned NumOps = CV->getNumOperands();
12406c3fb27SDimitry Andric     unsigned NumEltsBits = Ty->getScalarSizeInBits();
12506c3fb27SDimitry Andric     unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
12606c3fb27SDimitry Andric     if ((SplatBitWidth % NumEltsBits) == 0) {
12706c3fb27SDimitry Andric       // Collect the elements and ensure that within the repeated splat sequence
12806c3fb27SDimitry Andric       // they either match or are undef.
12906c3fb27SDimitry Andric       SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr);
13006c3fb27SDimitry Andric       for (unsigned Idx = 0; Idx != NumOps; ++Idx) {
13106c3fb27SDimitry Andric         if (Constant *Elt = CV->getAggregateElement(Idx)) {
13206c3fb27SDimitry Andric           if (isa<UndefValue>(Elt))
13306c3fb27SDimitry Andric             continue;
13406c3fb27SDimitry Andric           unsigned SplatIdx = Idx % NumScaleOps;
13506c3fb27SDimitry Andric           if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
13606c3fb27SDimitry Andric             Sequence[SplatIdx] = Elt;
13706c3fb27SDimitry Andric             continue;
13806c3fb27SDimitry Andric           }
13906c3fb27SDimitry Andric         }
14006c3fb27SDimitry Andric         return std::nullopt;
14106c3fb27SDimitry Andric       }
14206c3fb27SDimitry Andric       // Extract the constant bits forming the splat and insert into the bits
14306c3fb27SDimitry Andric       // data, leave undef as zero.
14406c3fb27SDimitry Andric       APInt SplatBits = APInt::getZero(SplatBitWidth);
14506c3fb27SDimitry Andric       for (unsigned I = 0; I != NumScaleOps; ++I) {
14606c3fb27SDimitry Andric         if (!Sequence[I])
14706c3fb27SDimitry Andric           continue;
14806c3fb27SDimitry Andric         if (std::optional<APInt> Bits = extractConstantBits(Sequence[I])) {
14906c3fb27SDimitry Andric           SplatBits.insertBits(*Bits, I * Bits->getBitWidth());
15006c3fb27SDimitry Andric           continue;
15106c3fb27SDimitry Andric         }
15206c3fb27SDimitry Andric         return std::nullopt;
15306c3fb27SDimitry Andric       }
15406c3fb27SDimitry Andric       return SplatBits;
15506c3fb27SDimitry Andric     }
15606c3fb27SDimitry Andric   }
15706c3fb27SDimitry Andric 
15806c3fb27SDimitry Andric   return std::nullopt;
15906c3fb27SDimitry Andric }
16006c3fb27SDimitry Andric 
161*7a6dacacSDimitry Andric // Split raw bits into a constant vector of elements of a specific bit width.
162*7a6dacacSDimitry Andric // NOTE: We don't always bother converting to scalars if the vector length is 1.
163*7a6dacacSDimitry Andric static Constant *rebuildConstant(LLVMContext &Ctx, Type *SclTy,
164*7a6dacacSDimitry Andric                                  const APInt &Bits, unsigned NumSclBits) {
165*7a6dacacSDimitry Andric   unsigned BitWidth = Bits.getBitWidth();
166*7a6dacacSDimitry Andric 
167*7a6dacacSDimitry Andric   if (NumSclBits == 8) {
168*7a6dacacSDimitry Andric     SmallVector<uint8_t> RawBits;
169*7a6dacacSDimitry Andric     for (unsigned I = 0; I != BitWidth; I += 8)
170*7a6dacacSDimitry Andric       RawBits.push_back(Bits.extractBits(8, I).getZExtValue());
171*7a6dacacSDimitry Andric     return ConstantDataVector::get(Ctx, RawBits);
172*7a6dacacSDimitry Andric   }
173*7a6dacacSDimitry Andric 
174*7a6dacacSDimitry Andric   if (NumSclBits == 16) {
175*7a6dacacSDimitry Andric     SmallVector<uint16_t> RawBits;
176*7a6dacacSDimitry Andric     for (unsigned I = 0; I != BitWidth; I += 16)
177*7a6dacacSDimitry Andric       RawBits.push_back(Bits.extractBits(16, I).getZExtValue());
178*7a6dacacSDimitry Andric     if (SclTy->is16bitFPTy())
179*7a6dacacSDimitry Andric       return ConstantDataVector::getFP(SclTy, RawBits);
180*7a6dacacSDimitry Andric     return ConstantDataVector::get(Ctx, RawBits);
181*7a6dacacSDimitry Andric   }
182*7a6dacacSDimitry Andric 
183*7a6dacacSDimitry Andric   if (NumSclBits == 32) {
184*7a6dacacSDimitry Andric     SmallVector<uint32_t> RawBits;
185*7a6dacacSDimitry Andric     for (unsigned I = 0; I != BitWidth; I += 32)
186*7a6dacacSDimitry Andric       RawBits.push_back(Bits.extractBits(32, I).getZExtValue());
187*7a6dacacSDimitry Andric     if (SclTy->isFloatTy())
188*7a6dacacSDimitry Andric       return ConstantDataVector::getFP(SclTy, RawBits);
189*7a6dacacSDimitry Andric     return ConstantDataVector::get(Ctx, RawBits);
190*7a6dacacSDimitry Andric   }
191*7a6dacacSDimitry Andric 
192*7a6dacacSDimitry Andric   assert(NumSclBits == 64 && "Unhandled vector element width");
193*7a6dacacSDimitry Andric 
194*7a6dacacSDimitry Andric   SmallVector<uint64_t> RawBits;
195*7a6dacacSDimitry Andric   for (unsigned I = 0; I != BitWidth; I += 64)
196*7a6dacacSDimitry Andric     RawBits.push_back(Bits.extractBits(64, I).getZExtValue());
197*7a6dacacSDimitry Andric   if (SclTy->isDoubleTy())
198*7a6dacacSDimitry Andric     return ConstantDataVector::getFP(SclTy, RawBits);
199*7a6dacacSDimitry Andric   return ConstantDataVector::get(Ctx, RawBits);
200*7a6dacacSDimitry Andric }
201*7a6dacacSDimitry Andric 
20206c3fb27SDimitry Andric // Attempt to rebuild a normalized splat vector constant of the requested splat
20306c3fb27SDimitry Andric // width, built up of potentially smaller scalar values.
20406c3fb27SDimitry Andric static Constant *rebuildSplatableConstant(const Constant *C,
20506c3fb27SDimitry Andric                                           unsigned SplatBitWidth) {
20606c3fb27SDimitry Andric   std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth);
20706c3fb27SDimitry Andric   if (!Splat)
20806c3fb27SDimitry Andric     return nullptr;
20906c3fb27SDimitry Andric 
21006c3fb27SDimitry Andric   // Determine scalar size to use for the constant splat vector, clamping as we
21106c3fb27SDimitry Andric   // might have found a splat smaller than the original constant data.
21206c3fb27SDimitry Andric   const Type *OriginalType = C->getType();
21306c3fb27SDimitry Andric   Type *SclTy = OriginalType->getScalarType();
21406c3fb27SDimitry Andric   unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
21506c3fb27SDimitry Andric   NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
21606c3fb27SDimitry Andric 
21706c3fb27SDimitry Andric   // Fallback to i64 / double.
218*7a6dacacSDimitry Andric   NumSclBits = (NumSclBits == 8 || NumSclBits == 16 || NumSclBits == 32)
219*7a6dacacSDimitry Andric                    ? NumSclBits
220*7a6dacacSDimitry Andric                    : 64;
221*7a6dacacSDimitry Andric 
222*7a6dacacSDimitry Andric   // Extract per-element bits.
223*7a6dacacSDimitry Andric   return rebuildConstant(OriginalType->getContext(), SclTy, *Splat, NumSclBits);
22406c3fb27SDimitry Andric }
22506c3fb27SDimitry Andric 
22606c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
22706c3fb27SDimitry Andric                                                      MachineBasicBlock &MBB,
22806c3fb27SDimitry Andric                                                      MachineInstr &MI) {
22906c3fb27SDimitry Andric   unsigned Opc = MI.getOpcode();
23006c3fb27SDimitry Andric   MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
2315f757f3fSDimitry Andric   bool HasAVX2 = ST->hasAVX2();
23206c3fb27SDimitry Andric   bool HasDQI = ST->hasDQI();
23306c3fb27SDimitry Andric   bool HasBWI = ST->hasBWI();
2345f757f3fSDimitry Andric   bool HasVLX = ST->hasVLX();
23506c3fb27SDimitry Andric 
23606c3fb27SDimitry Andric   auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128,
23706c3fb27SDimitry Andric                                 unsigned OpBcst64, unsigned OpBcst32,
23806c3fb27SDimitry Andric                                 unsigned OpBcst16, unsigned OpBcst8,
23906c3fb27SDimitry Andric                                 unsigned OperandNo) {
24006c3fb27SDimitry Andric     assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
24106c3fb27SDimitry Andric            "Unexpected number of operands!");
24206c3fb27SDimitry Andric 
243*7a6dacacSDimitry Andric     if (auto *C = X86::getConstantFromPool(MI, OperandNo)) {
24406c3fb27SDimitry Andric       // Attempt to detect a suitable splat from increasing splat widths.
24506c3fb27SDimitry Andric       std::pair<unsigned, unsigned> Broadcasts[] = {
24606c3fb27SDimitry Andric           {8, OpBcst8},   {16, OpBcst16},   {32, OpBcst32},
24706c3fb27SDimitry Andric           {64, OpBcst64}, {128, OpBcst128}, {256, OpBcst256},
24806c3fb27SDimitry Andric       };
24906c3fb27SDimitry Andric       for (auto [BitWidth, OpBcst] : Broadcasts) {
25006c3fb27SDimitry Andric         if (OpBcst) {
25106c3fb27SDimitry Andric           // Construct a suitable splat constant and adjust the MI to
25206c3fb27SDimitry Andric           // use the new constant pool entry.
25306c3fb27SDimitry Andric           if (Constant *NewCst = rebuildSplatableConstant(C, BitWidth)) {
25406c3fb27SDimitry Andric             unsigned NewCPI =
25506c3fb27SDimitry Andric                 CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8));
25606c3fb27SDimitry Andric             MI.setDesc(TII->get(OpBcst));
257*7a6dacacSDimitry Andric             MI.getOperand(OperandNo + X86::AddrDisp).setIndex(NewCPI);
25806c3fb27SDimitry Andric             return true;
25906c3fb27SDimitry Andric           }
26006c3fb27SDimitry Andric         }
26106c3fb27SDimitry Andric       }
26206c3fb27SDimitry Andric     }
26306c3fb27SDimitry Andric     return false;
26406c3fb27SDimitry Andric   };
26506c3fb27SDimitry Andric 
26606c3fb27SDimitry Andric   // Attempt to convert full width vector loads into broadcast loads.
26706c3fb27SDimitry Andric   switch (Opc) {
26806c3fb27SDimitry Andric   /* FP Loads */
26906c3fb27SDimitry Andric   case X86::MOVAPDrm:
27006c3fb27SDimitry Andric   case X86::MOVAPSrm:
27106c3fb27SDimitry Andric   case X86::MOVUPDrm:
27206c3fb27SDimitry Andric   case X86::MOVUPSrm:
27306c3fb27SDimitry Andric     // TODO: SSE3 MOVDDUP Handling
27406c3fb27SDimitry Andric     return false;
27506c3fb27SDimitry Andric   case X86::VMOVAPDrm:
27606c3fb27SDimitry Andric   case X86::VMOVAPSrm:
27706c3fb27SDimitry Andric   case X86::VMOVUPDrm:
27806c3fb27SDimitry Andric   case X86::VMOVUPSrm:
27906c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
28006c3fb27SDimitry Andric                               1);
28106c3fb27SDimitry Andric   case X86::VMOVAPDYrm:
28206c3fb27SDimitry Andric   case X86::VMOVAPSYrm:
28306c3fb27SDimitry Andric   case X86::VMOVUPDYrm:
28406c3fb27SDimitry Andric   case X86::VMOVUPSYrm:
2855f757f3fSDimitry Andric     return ConvertToBroadcast(0, X86::VBROADCASTF128rm, X86::VBROADCASTSDYrm,
28606c3fb27SDimitry Andric                               X86::VBROADCASTSSYrm, 0, 0, 1);
28706c3fb27SDimitry Andric   case X86::VMOVAPDZ128rm:
28806c3fb27SDimitry Andric   case X86::VMOVAPSZ128rm:
28906c3fb27SDimitry Andric   case X86::VMOVUPDZ128rm:
29006c3fb27SDimitry Andric   case X86::VMOVUPSZ128rm:
29106c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm,
29206c3fb27SDimitry Andric                               X86::VBROADCASTSSZ128rm, 0, 0, 1);
29306c3fb27SDimitry Andric   case X86::VMOVAPDZ256rm:
29406c3fb27SDimitry Andric   case X86::VMOVAPSZ256rm:
29506c3fb27SDimitry Andric   case X86::VMOVUPDZ256rm:
29606c3fb27SDimitry Andric   case X86::VMOVUPSZ256rm:
2975f757f3fSDimitry Andric     return ConvertToBroadcast(0, X86::VBROADCASTF32X4Z256rm,
2985f757f3fSDimitry Andric                               X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm,
2995f757f3fSDimitry Andric                               0, 0, 1);
30006c3fb27SDimitry Andric   case X86::VMOVAPDZrm:
30106c3fb27SDimitry Andric   case X86::VMOVAPSZrm:
30206c3fb27SDimitry Andric   case X86::VMOVUPDZrm:
30306c3fb27SDimitry Andric   case X86::VMOVUPSZrm:
3045f757f3fSDimitry Andric     return ConvertToBroadcast(X86::VBROADCASTF64X4rm, X86::VBROADCASTF32X4rm,
3055f757f3fSDimitry Andric                               X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0,
3065f757f3fSDimitry Andric                               1);
30706c3fb27SDimitry Andric     /* Integer Loads */
30806c3fb27SDimitry Andric   case X86::VMOVDQArm:
30906c3fb27SDimitry Andric   case X86::VMOVDQUrm:
3105f757f3fSDimitry Andric     return ConvertToBroadcast(
3115f757f3fSDimitry Andric         0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm,
3125f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm,
3135f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTWrm : 0, HasAVX2 ? X86::VPBROADCASTBrm : 0,
31406c3fb27SDimitry Andric         1);
31506c3fb27SDimitry Andric   case X86::VMOVDQAYrm:
31606c3fb27SDimitry Andric   case X86::VMOVDQUYrm:
3175f757f3fSDimitry Andric     return ConvertToBroadcast(
3185f757f3fSDimitry Andric         0, HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm,
3195f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm,
3205f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm,
3215f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTWYrm : 0, HasAVX2 ? X86::VPBROADCASTBYrm : 0,
3225f757f3fSDimitry Andric         1);
32306c3fb27SDimitry Andric   case X86::VMOVDQA32Z128rm:
32406c3fb27SDimitry Andric   case X86::VMOVDQA64Z128rm:
32506c3fb27SDimitry Andric   case X86::VMOVDQU32Z128rm:
32606c3fb27SDimitry Andric   case X86::VMOVDQU64Z128rm:
32706c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm,
32806c3fb27SDimitry Andric                               X86::VPBROADCASTDZ128rm,
32906c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTWZ128rm : 0,
33006c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1);
33106c3fb27SDimitry Andric   case X86::VMOVDQA32Z256rm:
33206c3fb27SDimitry Andric   case X86::VMOVDQA64Z256rm:
33306c3fb27SDimitry Andric   case X86::VMOVDQU32Z256rm:
33406c3fb27SDimitry Andric   case X86::VMOVDQU64Z256rm:
3355f757f3fSDimitry Andric     return ConvertToBroadcast(0, X86::VBROADCASTI32X4Z256rm,
33606c3fb27SDimitry Andric                               X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm,
33706c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTWZ256rm : 0,
33806c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1);
33906c3fb27SDimitry Andric   case X86::VMOVDQA32Zrm:
34006c3fb27SDimitry Andric   case X86::VMOVDQA64Zrm:
34106c3fb27SDimitry Andric   case X86::VMOVDQU32Zrm:
34206c3fb27SDimitry Andric   case X86::VMOVDQU64Zrm:
3435f757f3fSDimitry Andric     return ConvertToBroadcast(X86::VBROADCASTI64X4rm, X86::VBROADCASTI32X4rm,
34406c3fb27SDimitry Andric                               X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
3455f757f3fSDimitry Andric                               HasBWI ? X86::VPBROADCASTWZrm : 0,
3465f757f3fSDimitry Andric                               HasBWI ? X86::VPBROADCASTBZrm : 0, 1);
34706c3fb27SDimitry Andric   }
34806c3fb27SDimitry Andric 
3495f757f3fSDimitry Andric   auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
35006c3fb27SDimitry Andric     unsigned OpBcst32 = 0, OpBcst64 = 0;
35106c3fb27SDimitry Andric     unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
3525f757f3fSDimitry Andric     if (OpSrc32) {
3535f757f3fSDimitry Andric       if (const X86FoldTableEntry *Mem2Bcst =
3545f757f3fSDimitry Andric               llvm::lookupBroadcastFoldTable(OpSrc32, 32)) {
35506c3fb27SDimitry Andric         OpBcst32 = Mem2Bcst->DstOp;
35606c3fb27SDimitry Andric         OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
35706c3fb27SDimitry Andric       }
3585f757f3fSDimitry Andric     }
3595f757f3fSDimitry Andric     if (OpSrc64) {
3605f757f3fSDimitry Andric       if (const X86FoldTableEntry *Mem2Bcst =
3615f757f3fSDimitry Andric               llvm::lookupBroadcastFoldTable(OpSrc64, 64)) {
36206c3fb27SDimitry Andric         OpBcst64 = Mem2Bcst->DstOp;
36306c3fb27SDimitry Andric         OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
36406c3fb27SDimitry Andric       }
3655f757f3fSDimitry Andric     }
36606c3fb27SDimitry Andric     assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
36706c3fb27SDimitry Andric            "OperandNo mismatch");
36806c3fb27SDimitry Andric 
36906c3fb27SDimitry Andric     if (OpBcst32 || OpBcst64) {
37006c3fb27SDimitry Andric       unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
37106c3fb27SDimitry Andric       return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo);
37206c3fb27SDimitry Andric     }
3735f757f3fSDimitry Andric     return false;
3745f757f3fSDimitry Andric   };
3755f757f3fSDimitry Andric 
3765f757f3fSDimitry Andric   // Attempt to find a AVX512 mapping from a full width memory-fold instruction
3775f757f3fSDimitry Andric   // to a broadcast-fold instruction variant.
3785f757f3fSDimitry Andric   if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX)
3795f757f3fSDimitry Andric     return ConvertToBroadcastAVX512(Opc, Opc);
3805f757f3fSDimitry Andric 
3815f757f3fSDimitry Andric   // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
3825f757f3fSDimitry Andric   // conversion to see if we can convert to a broadcasted (integer) logic op.
3835f757f3fSDimitry Andric   if (HasVLX && !HasDQI) {
3845f757f3fSDimitry Andric     unsigned OpSrc32 = 0, OpSrc64 = 0;
3855f757f3fSDimitry Andric     switch (Opc) {
3865f757f3fSDimitry Andric     case X86::VANDPDrm:
3875f757f3fSDimitry Andric     case X86::VANDPSrm:
3885f757f3fSDimitry Andric     case X86::VPANDrm:
3895f757f3fSDimitry Andric       OpSrc32 = X86 ::VPANDDZ128rm;
3905f757f3fSDimitry Andric       OpSrc64 = X86 ::VPANDQZ128rm;
3915f757f3fSDimitry Andric       break;
3925f757f3fSDimitry Andric     case X86::VANDPDYrm:
3935f757f3fSDimitry Andric     case X86::VANDPSYrm:
3945f757f3fSDimitry Andric     case X86::VPANDYrm:
3955f757f3fSDimitry Andric       OpSrc32 = X86 ::VPANDDZ256rm;
3965f757f3fSDimitry Andric       OpSrc64 = X86 ::VPANDQZ256rm;
3975f757f3fSDimitry Andric       break;
3985f757f3fSDimitry Andric     case X86::VANDNPDrm:
3995f757f3fSDimitry Andric     case X86::VANDNPSrm:
4005f757f3fSDimitry Andric     case X86::VPANDNrm:
4015f757f3fSDimitry Andric       OpSrc32 = X86 ::VPANDNDZ128rm;
4025f757f3fSDimitry Andric       OpSrc64 = X86 ::VPANDNQZ128rm;
4035f757f3fSDimitry Andric       break;
4045f757f3fSDimitry Andric     case X86::VANDNPDYrm:
4055f757f3fSDimitry Andric     case X86::VANDNPSYrm:
4065f757f3fSDimitry Andric     case X86::VPANDNYrm:
4075f757f3fSDimitry Andric       OpSrc32 = X86 ::VPANDNDZ256rm;
4085f757f3fSDimitry Andric       OpSrc64 = X86 ::VPANDNQZ256rm;
4095f757f3fSDimitry Andric       break;
4105f757f3fSDimitry Andric     case X86::VORPDrm:
4115f757f3fSDimitry Andric     case X86::VORPSrm:
4125f757f3fSDimitry Andric     case X86::VPORrm:
4135f757f3fSDimitry Andric       OpSrc32 = X86 ::VPORDZ128rm;
4145f757f3fSDimitry Andric       OpSrc64 = X86 ::VPORQZ128rm;
4155f757f3fSDimitry Andric       break;
4165f757f3fSDimitry Andric     case X86::VORPDYrm:
4175f757f3fSDimitry Andric     case X86::VORPSYrm:
4185f757f3fSDimitry Andric     case X86::VPORYrm:
4195f757f3fSDimitry Andric       OpSrc32 = X86 ::VPORDZ256rm;
4205f757f3fSDimitry Andric       OpSrc64 = X86 ::VPORQZ256rm;
4215f757f3fSDimitry Andric       break;
4225f757f3fSDimitry Andric     case X86::VXORPDrm:
4235f757f3fSDimitry Andric     case X86::VXORPSrm:
4245f757f3fSDimitry Andric     case X86::VPXORrm:
4255f757f3fSDimitry Andric       OpSrc32 = X86 ::VPXORDZ128rm;
4265f757f3fSDimitry Andric       OpSrc64 = X86 ::VPXORQZ128rm;
4275f757f3fSDimitry Andric       break;
4285f757f3fSDimitry Andric     case X86::VXORPDYrm:
4295f757f3fSDimitry Andric     case X86::VXORPSYrm:
4305f757f3fSDimitry Andric     case X86::VPXORYrm:
4315f757f3fSDimitry Andric       OpSrc32 = X86 ::VPXORDZ256rm;
4325f757f3fSDimitry Andric       OpSrc64 = X86 ::VPXORQZ256rm;
4335f757f3fSDimitry Andric       break;
4345f757f3fSDimitry Andric     }
4355f757f3fSDimitry Andric     if (OpSrc32 || OpSrc64)
4365f757f3fSDimitry Andric       return ConvertToBroadcastAVX512(OpSrc32, OpSrc64);
43706c3fb27SDimitry Andric   }
43806c3fb27SDimitry Andric 
43906c3fb27SDimitry Andric   return false;
44006c3fb27SDimitry Andric }
44106c3fb27SDimitry Andric 
44206c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::runOnMachineFunction(MachineFunction &MF) {
44306c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n";);
44406c3fb27SDimitry Andric   bool Changed = false;
44506c3fb27SDimitry Andric   ST = &MF.getSubtarget<X86Subtarget>();
44606c3fb27SDimitry Andric   TII = ST->getInstrInfo();
44706c3fb27SDimitry Andric   SM = &ST->getSchedModel();
44806c3fb27SDimitry Andric 
44906c3fb27SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
45006c3fb27SDimitry Andric     for (MachineInstr &MI : MBB) {
45106c3fb27SDimitry Andric       if (processInstruction(MF, MBB, MI)) {
45206c3fb27SDimitry Andric         ++NumInstChanges;
45306c3fb27SDimitry Andric         Changed = true;
45406c3fb27SDimitry Andric       }
45506c3fb27SDimitry Andric     }
45606c3fb27SDimitry Andric   }
45706c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n";);
45806c3fb27SDimitry Andric   return Changed;
45906c3fb27SDimitry Andric }
460