xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86FixupVectorConstants.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
106c3fb27SDimitry Andric //===-- X86FixupVectorConstants.cpp - optimize constant generation  -------===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric //
906c3fb27SDimitry Andric // This file examines all full size vector constant pool loads and attempts to
1006c3fb27SDimitry Andric // replace them with smaller constant pool entries, including:
1106c3fb27SDimitry Andric // * Converting AVX512 memory-fold instructions to their broadcast-fold form
12*5f757f3fSDimitry Andric // * Broadcasting of full width loads.
1306c3fb27SDimitry Andric // * TODO: Sign/Zero extension of full width loads.
1406c3fb27SDimitry Andric //
1506c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
1606c3fb27SDimitry Andric 
1706c3fb27SDimitry Andric #include "X86.h"
1806c3fb27SDimitry Andric #include "X86InstrFoldTables.h"
1906c3fb27SDimitry Andric #include "X86InstrInfo.h"
2006c3fb27SDimitry Andric #include "X86Subtarget.h"
2106c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h"
2206c3fb27SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
2306c3fb27SDimitry Andric 
2406c3fb27SDimitry Andric using namespace llvm;
2506c3fb27SDimitry Andric 
2606c3fb27SDimitry Andric #define DEBUG_TYPE "x86-fixup-vector-constants"
2706c3fb27SDimitry Andric 
2806c3fb27SDimitry Andric STATISTIC(NumInstChanges, "Number of instructions changes");
2906c3fb27SDimitry Andric 
3006c3fb27SDimitry Andric namespace {
3106c3fb27SDimitry Andric class X86FixupVectorConstantsPass : public MachineFunctionPass {
3206c3fb27SDimitry Andric public:
3306c3fb27SDimitry Andric   static char ID;
3406c3fb27SDimitry Andric 
3506c3fb27SDimitry Andric   X86FixupVectorConstantsPass() : MachineFunctionPass(ID) {}
3606c3fb27SDimitry Andric 
3706c3fb27SDimitry Andric   StringRef getPassName() const override {
3806c3fb27SDimitry Andric     return "X86 Fixup Vector Constants";
3906c3fb27SDimitry Andric   }
4006c3fb27SDimitry Andric 
4106c3fb27SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
4206c3fb27SDimitry Andric   bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
4306c3fb27SDimitry Andric                           MachineInstr &MI);
4406c3fb27SDimitry Andric 
4506c3fb27SDimitry Andric   // This pass runs after regalloc and doesn't support VReg operands.
4606c3fb27SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
4706c3fb27SDimitry Andric     return MachineFunctionProperties().set(
4806c3fb27SDimitry Andric         MachineFunctionProperties::Property::NoVRegs);
4906c3fb27SDimitry Andric   }
5006c3fb27SDimitry Andric 
5106c3fb27SDimitry Andric private:
5206c3fb27SDimitry Andric   const X86InstrInfo *TII = nullptr;
5306c3fb27SDimitry Andric   const X86Subtarget *ST = nullptr;
5406c3fb27SDimitry Andric   const MCSchedModel *SM = nullptr;
5506c3fb27SDimitry Andric };
5606c3fb27SDimitry Andric } // end anonymous namespace
5706c3fb27SDimitry Andric 
5806c3fb27SDimitry Andric char X86FixupVectorConstantsPass::ID = 0;
5906c3fb27SDimitry Andric 
6006c3fb27SDimitry Andric INITIALIZE_PASS(X86FixupVectorConstantsPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
6106c3fb27SDimitry Andric 
6206c3fb27SDimitry Andric FunctionPass *llvm::createX86FixupVectorConstants() {
6306c3fb27SDimitry Andric   return new X86FixupVectorConstantsPass();
6406c3fb27SDimitry Andric }
6506c3fb27SDimitry Andric 
6606c3fb27SDimitry Andric static const Constant *getConstantFromPool(const MachineInstr &MI,
6706c3fb27SDimitry Andric                                            const MachineOperand &Op) {
6806c3fb27SDimitry Andric   if (!Op.isCPI() || Op.getOffset() != 0)
6906c3fb27SDimitry Andric     return nullptr;
7006c3fb27SDimitry Andric 
7106c3fb27SDimitry Andric   ArrayRef<MachineConstantPoolEntry> Constants =
7206c3fb27SDimitry Andric       MI.getParent()->getParent()->getConstantPool()->getConstants();
7306c3fb27SDimitry Andric   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
7406c3fb27SDimitry Andric 
7506c3fb27SDimitry Andric   // Bail if this is a machine constant pool entry, we won't be able to dig out
7606c3fb27SDimitry Andric   // anything useful.
7706c3fb27SDimitry Andric   if (ConstantEntry.isMachineConstantPoolEntry())
7806c3fb27SDimitry Andric     return nullptr;
7906c3fb27SDimitry Andric 
8006c3fb27SDimitry Andric   return ConstantEntry.Val.ConstVal;
8106c3fb27SDimitry Andric }
8206c3fb27SDimitry Andric 
8306c3fb27SDimitry Andric // Attempt to extract the full width of bits data from the constant.
8406c3fb27SDimitry Andric static std::optional<APInt> extractConstantBits(const Constant *C) {
8506c3fb27SDimitry Andric   unsigned NumBits = C->getType()->getPrimitiveSizeInBits();
8606c3fb27SDimitry Andric 
8706c3fb27SDimitry Andric   if (auto *CInt = dyn_cast<ConstantInt>(C))
8806c3fb27SDimitry Andric     return CInt->getValue();
8906c3fb27SDimitry Andric 
9006c3fb27SDimitry Andric   if (auto *CFP = dyn_cast<ConstantFP>(C))
9106c3fb27SDimitry Andric     return CFP->getValue().bitcastToAPInt();
9206c3fb27SDimitry Andric 
9306c3fb27SDimitry Andric   if (auto *CV = dyn_cast<ConstantVector>(C)) {
9406c3fb27SDimitry Andric     if (auto *CVSplat = CV->getSplatValue(/*AllowUndefs*/ true)) {
9506c3fb27SDimitry Andric       if (std::optional<APInt> Bits = extractConstantBits(CVSplat)) {
9606c3fb27SDimitry Andric         assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat");
9706c3fb27SDimitry Andric         return APInt::getSplat(NumBits, *Bits);
9806c3fb27SDimitry Andric       }
9906c3fb27SDimitry Andric     }
10006c3fb27SDimitry Andric   }
10106c3fb27SDimitry Andric 
10206c3fb27SDimitry Andric   if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
10306c3fb27SDimitry Andric     bool IsInteger = CDS->getElementType()->isIntegerTy();
10406c3fb27SDimitry Andric     bool IsFloat = CDS->getElementType()->isHalfTy() ||
10506c3fb27SDimitry Andric                    CDS->getElementType()->isBFloatTy() ||
10606c3fb27SDimitry Andric                    CDS->getElementType()->isFloatTy() ||
10706c3fb27SDimitry Andric                    CDS->getElementType()->isDoubleTy();
10806c3fb27SDimitry Andric     if (IsInteger || IsFloat) {
10906c3fb27SDimitry Andric       APInt Bits = APInt::getZero(NumBits);
11006c3fb27SDimitry Andric       unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
11106c3fb27SDimitry Andric       for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
11206c3fb27SDimitry Andric         if (IsInteger)
11306c3fb27SDimitry Andric           Bits.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
11406c3fb27SDimitry Andric         else
11506c3fb27SDimitry Andric           Bits.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),
11606c3fb27SDimitry Andric                           I * EltBits);
11706c3fb27SDimitry Andric       }
11806c3fb27SDimitry Andric       return Bits;
11906c3fb27SDimitry Andric     }
12006c3fb27SDimitry Andric   }
12106c3fb27SDimitry Andric 
12206c3fb27SDimitry Andric   return std::nullopt;
12306c3fb27SDimitry Andric }
12406c3fb27SDimitry Andric 
12506c3fb27SDimitry Andric // Attempt to compute the splat width of bits data by normalizing the splat to
12606c3fb27SDimitry Andric // remove undefs.
12706c3fb27SDimitry Andric static std::optional<APInt> getSplatableConstant(const Constant *C,
12806c3fb27SDimitry Andric                                                  unsigned SplatBitWidth) {
12906c3fb27SDimitry Andric   const Type *Ty = C->getType();
13006c3fb27SDimitry Andric   assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 &&
13106c3fb27SDimitry Andric          "Illegal splat width");
13206c3fb27SDimitry Andric 
13306c3fb27SDimitry Andric   if (std::optional<APInt> Bits = extractConstantBits(C))
13406c3fb27SDimitry Andric     if (Bits->isSplat(SplatBitWidth))
13506c3fb27SDimitry Andric       return Bits->trunc(SplatBitWidth);
13606c3fb27SDimitry Andric 
13706c3fb27SDimitry Andric   // Detect general splats with undefs.
13806c3fb27SDimitry Andric   // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting?
13906c3fb27SDimitry Andric   if (auto *CV = dyn_cast<ConstantVector>(C)) {
14006c3fb27SDimitry Andric     unsigned NumOps = CV->getNumOperands();
14106c3fb27SDimitry Andric     unsigned NumEltsBits = Ty->getScalarSizeInBits();
14206c3fb27SDimitry Andric     unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
14306c3fb27SDimitry Andric     if ((SplatBitWidth % NumEltsBits) == 0) {
14406c3fb27SDimitry Andric       // Collect the elements and ensure that within the repeated splat sequence
14506c3fb27SDimitry Andric       // they either match or are undef.
14606c3fb27SDimitry Andric       SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr);
14706c3fb27SDimitry Andric       for (unsigned Idx = 0; Idx != NumOps; ++Idx) {
14806c3fb27SDimitry Andric         if (Constant *Elt = CV->getAggregateElement(Idx)) {
14906c3fb27SDimitry Andric           if (isa<UndefValue>(Elt))
15006c3fb27SDimitry Andric             continue;
15106c3fb27SDimitry Andric           unsigned SplatIdx = Idx % NumScaleOps;
15206c3fb27SDimitry Andric           if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
15306c3fb27SDimitry Andric             Sequence[SplatIdx] = Elt;
15406c3fb27SDimitry Andric             continue;
15506c3fb27SDimitry Andric           }
15606c3fb27SDimitry Andric         }
15706c3fb27SDimitry Andric         return std::nullopt;
15806c3fb27SDimitry Andric       }
15906c3fb27SDimitry Andric       // Extract the constant bits forming the splat and insert into the bits
16006c3fb27SDimitry Andric       // data, leave undef as zero.
16106c3fb27SDimitry Andric       APInt SplatBits = APInt::getZero(SplatBitWidth);
16206c3fb27SDimitry Andric       for (unsigned I = 0; I != NumScaleOps; ++I) {
16306c3fb27SDimitry Andric         if (!Sequence[I])
16406c3fb27SDimitry Andric           continue;
16506c3fb27SDimitry Andric         if (std::optional<APInt> Bits = extractConstantBits(Sequence[I])) {
16606c3fb27SDimitry Andric           SplatBits.insertBits(*Bits, I * Bits->getBitWidth());
16706c3fb27SDimitry Andric           continue;
16806c3fb27SDimitry Andric         }
16906c3fb27SDimitry Andric         return std::nullopt;
17006c3fb27SDimitry Andric       }
17106c3fb27SDimitry Andric       return SplatBits;
17206c3fb27SDimitry Andric     }
17306c3fb27SDimitry Andric   }
17406c3fb27SDimitry Andric 
17506c3fb27SDimitry Andric   return std::nullopt;
17606c3fb27SDimitry Andric }
17706c3fb27SDimitry Andric 
17806c3fb27SDimitry Andric // Attempt to rebuild a normalized splat vector constant of the requested splat
17906c3fb27SDimitry Andric // width, built up of potentially smaller scalar values.
18006c3fb27SDimitry Andric // NOTE: We don't always bother converting to scalars if the vector length is 1.
18106c3fb27SDimitry Andric static Constant *rebuildSplatableConstant(const Constant *C,
18206c3fb27SDimitry Andric                                           unsigned SplatBitWidth) {
18306c3fb27SDimitry Andric   std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth);
18406c3fb27SDimitry Andric   if (!Splat)
18506c3fb27SDimitry Andric     return nullptr;
18606c3fb27SDimitry Andric 
18706c3fb27SDimitry Andric   // Determine scalar size to use for the constant splat vector, clamping as we
18806c3fb27SDimitry Andric   // might have found a splat smaller than the original constant data.
18906c3fb27SDimitry Andric   const Type *OriginalType = C->getType();
19006c3fb27SDimitry Andric   Type *SclTy = OriginalType->getScalarType();
19106c3fb27SDimitry Andric   unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
19206c3fb27SDimitry Andric   NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
193*5f757f3fSDimitry Andric   LLVMContext &Ctx = OriginalType->getContext();
19406c3fb27SDimitry Andric 
19506c3fb27SDimitry Andric   if (NumSclBits == 8) {
19606c3fb27SDimitry Andric     SmallVector<uint8_t> RawBits;
19706c3fb27SDimitry Andric     for (unsigned I = 0; I != SplatBitWidth; I += 8)
19806c3fb27SDimitry Andric       RawBits.push_back(Splat->extractBits(8, I).getZExtValue());
199*5f757f3fSDimitry Andric     return ConstantDataVector::get(Ctx, RawBits);
20006c3fb27SDimitry Andric   }
20106c3fb27SDimitry Andric 
20206c3fb27SDimitry Andric   if (NumSclBits == 16) {
20306c3fb27SDimitry Andric     SmallVector<uint16_t> RawBits;
20406c3fb27SDimitry Andric     for (unsigned I = 0; I != SplatBitWidth; I += 16)
20506c3fb27SDimitry Andric       RawBits.push_back(Splat->extractBits(16, I).getZExtValue());
20606c3fb27SDimitry Andric     if (SclTy->is16bitFPTy())
20706c3fb27SDimitry Andric       return ConstantDataVector::getFP(SclTy, RawBits);
208*5f757f3fSDimitry Andric     return ConstantDataVector::get(Ctx, RawBits);
20906c3fb27SDimitry Andric   }
21006c3fb27SDimitry Andric 
21106c3fb27SDimitry Andric   if (NumSclBits == 32) {
21206c3fb27SDimitry Andric     SmallVector<uint32_t> RawBits;
21306c3fb27SDimitry Andric     for (unsigned I = 0; I != SplatBitWidth; I += 32)
21406c3fb27SDimitry Andric       RawBits.push_back(Splat->extractBits(32, I).getZExtValue());
21506c3fb27SDimitry Andric     if (SclTy->isFloatTy())
21606c3fb27SDimitry Andric       return ConstantDataVector::getFP(SclTy, RawBits);
217*5f757f3fSDimitry Andric     return ConstantDataVector::get(Ctx, RawBits);
21806c3fb27SDimitry Andric   }
21906c3fb27SDimitry Andric 
22006c3fb27SDimitry Andric   // Fallback to i64 / double.
22106c3fb27SDimitry Andric   SmallVector<uint64_t> RawBits;
22206c3fb27SDimitry Andric   for (unsigned I = 0; I != SplatBitWidth; I += 64)
22306c3fb27SDimitry Andric     RawBits.push_back(Splat->extractBits(64, I).getZExtValue());
22406c3fb27SDimitry Andric   if (SclTy->isDoubleTy())
22506c3fb27SDimitry Andric     return ConstantDataVector::getFP(SclTy, RawBits);
226*5f757f3fSDimitry Andric   return ConstantDataVector::get(Ctx, RawBits);
22706c3fb27SDimitry Andric }
22806c3fb27SDimitry Andric 
22906c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
23006c3fb27SDimitry Andric                                                      MachineBasicBlock &MBB,
23106c3fb27SDimitry Andric                                                      MachineInstr &MI) {
23206c3fb27SDimitry Andric   unsigned Opc = MI.getOpcode();
23306c3fb27SDimitry Andric   MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
234*5f757f3fSDimitry Andric   bool HasAVX2 = ST->hasAVX2();
23506c3fb27SDimitry Andric   bool HasDQI = ST->hasDQI();
23606c3fb27SDimitry Andric   bool HasBWI = ST->hasBWI();
237*5f757f3fSDimitry Andric   bool HasVLX = ST->hasVLX();
23806c3fb27SDimitry Andric 
23906c3fb27SDimitry Andric   auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128,
24006c3fb27SDimitry Andric                                 unsigned OpBcst64, unsigned OpBcst32,
24106c3fb27SDimitry Andric                                 unsigned OpBcst16, unsigned OpBcst8,
24206c3fb27SDimitry Andric                                 unsigned OperandNo) {
24306c3fb27SDimitry Andric     assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
24406c3fb27SDimitry Andric            "Unexpected number of operands!");
24506c3fb27SDimitry Andric 
24606c3fb27SDimitry Andric     MachineOperand &CstOp = MI.getOperand(OperandNo + X86::AddrDisp);
24706c3fb27SDimitry Andric     if (auto *C = getConstantFromPool(MI, CstOp)) {
24806c3fb27SDimitry Andric       // Attempt to detect a suitable splat from increasing splat widths.
24906c3fb27SDimitry Andric       std::pair<unsigned, unsigned> Broadcasts[] = {
25006c3fb27SDimitry Andric           {8, OpBcst8},   {16, OpBcst16},   {32, OpBcst32},
25106c3fb27SDimitry Andric           {64, OpBcst64}, {128, OpBcst128}, {256, OpBcst256},
25206c3fb27SDimitry Andric       };
25306c3fb27SDimitry Andric       for (auto [BitWidth, OpBcst] : Broadcasts) {
25406c3fb27SDimitry Andric         if (OpBcst) {
25506c3fb27SDimitry Andric           // Construct a suitable splat constant and adjust the MI to
25606c3fb27SDimitry Andric           // use the new constant pool entry.
25706c3fb27SDimitry Andric           if (Constant *NewCst = rebuildSplatableConstant(C, BitWidth)) {
25806c3fb27SDimitry Andric             unsigned NewCPI =
25906c3fb27SDimitry Andric                 CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8));
26006c3fb27SDimitry Andric             MI.setDesc(TII->get(OpBcst));
26106c3fb27SDimitry Andric             CstOp.setIndex(NewCPI);
26206c3fb27SDimitry Andric             return true;
26306c3fb27SDimitry Andric           }
26406c3fb27SDimitry Andric         }
26506c3fb27SDimitry Andric       }
26606c3fb27SDimitry Andric     }
26706c3fb27SDimitry Andric     return false;
26806c3fb27SDimitry Andric   };
26906c3fb27SDimitry Andric 
27006c3fb27SDimitry Andric   // Attempt to convert full width vector loads into broadcast loads.
27106c3fb27SDimitry Andric   switch (Opc) {
27206c3fb27SDimitry Andric   /* FP Loads */
27306c3fb27SDimitry Andric   case X86::MOVAPDrm:
27406c3fb27SDimitry Andric   case X86::MOVAPSrm:
27506c3fb27SDimitry Andric   case X86::MOVUPDrm:
27606c3fb27SDimitry Andric   case X86::MOVUPSrm:
27706c3fb27SDimitry Andric     // TODO: SSE3 MOVDDUP Handling
27806c3fb27SDimitry Andric     return false;
27906c3fb27SDimitry Andric   case X86::VMOVAPDrm:
28006c3fb27SDimitry Andric   case X86::VMOVAPSrm:
28106c3fb27SDimitry Andric   case X86::VMOVUPDrm:
28206c3fb27SDimitry Andric   case X86::VMOVUPSrm:
28306c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
28406c3fb27SDimitry Andric                               1);
28506c3fb27SDimitry Andric   case X86::VMOVAPDYrm:
28606c3fb27SDimitry Andric   case X86::VMOVAPSYrm:
28706c3fb27SDimitry Andric   case X86::VMOVUPDYrm:
28806c3fb27SDimitry Andric   case X86::VMOVUPSYrm:
289*5f757f3fSDimitry Andric     return ConvertToBroadcast(0, X86::VBROADCASTF128rm, X86::VBROADCASTSDYrm,
29006c3fb27SDimitry Andric                               X86::VBROADCASTSSYrm, 0, 0, 1);
29106c3fb27SDimitry Andric   case X86::VMOVAPDZ128rm:
29206c3fb27SDimitry Andric   case X86::VMOVAPSZ128rm:
29306c3fb27SDimitry Andric   case X86::VMOVUPDZ128rm:
29406c3fb27SDimitry Andric   case X86::VMOVUPSZ128rm:
29506c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm,
29606c3fb27SDimitry Andric                               X86::VBROADCASTSSZ128rm, 0, 0, 1);
29706c3fb27SDimitry Andric   case X86::VMOVAPDZ256rm:
29806c3fb27SDimitry Andric   case X86::VMOVAPSZ256rm:
29906c3fb27SDimitry Andric   case X86::VMOVUPDZ256rm:
30006c3fb27SDimitry Andric   case X86::VMOVUPSZ256rm:
301*5f757f3fSDimitry Andric     return ConvertToBroadcast(0, X86::VBROADCASTF32X4Z256rm,
302*5f757f3fSDimitry Andric                               X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm,
303*5f757f3fSDimitry Andric                               0, 0, 1);
30406c3fb27SDimitry Andric   case X86::VMOVAPDZrm:
30506c3fb27SDimitry Andric   case X86::VMOVAPSZrm:
30606c3fb27SDimitry Andric   case X86::VMOVUPDZrm:
30706c3fb27SDimitry Andric   case X86::VMOVUPSZrm:
308*5f757f3fSDimitry Andric     return ConvertToBroadcast(X86::VBROADCASTF64X4rm, X86::VBROADCASTF32X4rm,
309*5f757f3fSDimitry Andric                               X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0,
310*5f757f3fSDimitry Andric                               1);
31106c3fb27SDimitry Andric     /* Integer Loads */
31206c3fb27SDimitry Andric   case X86::VMOVDQArm:
31306c3fb27SDimitry Andric   case X86::VMOVDQUrm:
314*5f757f3fSDimitry Andric     return ConvertToBroadcast(
315*5f757f3fSDimitry Andric         0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm,
316*5f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm,
317*5f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTWrm : 0, HasAVX2 ? X86::VPBROADCASTBrm : 0,
31806c3fb27SDimitry Andric         1);
31906c3fb27SDimitry Andric   case X86::VMOVDQAYrm:
32006c3fb27SDimitry Andric   case X86::VMOVDQUYrm:
321*5f757f3fSDimitry Andric     return ConvertToBroadcast(
322*5f757f3fSDimitry Andric         0, HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm,
323*5f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm,
324*5f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm,
325*5f757f3fSDimitry Andric         HasAVX2 ? X86::VPBROADCASTWYrm : 0, HasAVX2 ? X86::VPBROADCASTBYrm : 0,
326*5f757f3fSDimitry Andric         1);
32706c3fb27SDimitry Andric   case X86::VMOVDQA32Z128rm:
32806c3fb27SDimitry Andric   case X86::VMOVDQA64Z128rm:
32906c3fb27SDimitry Andric   case X86::VMOVDQU32Z128rm:
33006c3fb27SDimitry Andric   case X86::VMOVDQU64Z128rm:
33106c3fb27SDimitry Andric     return ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm,
33206c3fb27SDimitry Andric                               X86::VPBROADCASTDZ128rm,
33306c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTWZ128rm : 0,
33406c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1);
33506c3fb27SDimitry Andric   case X86::VMOVDQA32Z256rm:
33606c3fb27SDimitry Andric   case X86::VMOVDQA64Z256rm:
33706c3fb27SDimitry Andric   case X86::VMOVDQU32Z256rm:
33806c3fb27SDimitry Andric   case X86::VMOVDQU64Z256rm:
339*5f757f3fSDimitry Andric     return ConvertToBroadcast(0, X86::VBROADCASTI32X4Z256rm,
34006c3fb27SDimitry Andric                               X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm,
34106c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTWZ256rm : 0,
34206c3fb27SDimitry Andric                               HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1);
34306c3fb27SDimitry Andric   case X86::VMOVDQA32Zrm:
34406c3fb27SDimitry Andric   case X86::VMOVDQA64Zrm:
34506c3fb27SDimitry Andric   case X86::VMOVDQU32Zrm:
34606c3fb27SDimitry Andric   case X86::VMOVDQU64Zrm:
347*5f757f3fSDimitry Andric     return ConvertToBroadcast(X86::VBROADCASTI64X4rm, X86::VBROADCASTI32X4rm,
34806c3fb27SDimitry Andric                               X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
349*5f757f3fSDimitry Andric                               HasBWI ? X86::VPBROADCASTWZrm : 0,
350*5f757f3fSDimitry Andric                               HasBWI ? X86::VPBROADCASTBZrm : 0, 1);
35106c3fb27SDimitry Andric   }
35206c3fb27SDimitry Andric 
353*5f757f3fSDimitry Andric   auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
35406c3fb27SDimitry Andric     unsigned OpBcst32 = 0, OpBcst64 = 0;
35506c3fb27SDimitry Andric     unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
356*5f757f3fSDimitry Andric     if (OpSrc32) {
357*5f757f3fSDimitry Andric       if (const X86FoldTableEntry *Mem2Bcst =
358*5f757f3fSDimitry Andric               llvm::lookupBroadcastFoldTable(OpSrc32, 32)) {
35906c3fb27SDimitry Andric         OpBcst32 = Mem2Bcst->DstOp;
36006c3fb27SDimitry Andric         OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
36106c3fb27SDimitry Andric       }
362*5f757f3fSDimitry Andric     }
363*5f757f3fSDimitry Andric     if (OpSrc64) {
364*5f757f3fSDimitry Andric       if (const X86FoldTableEntry *Mem2Bcst =
365*5f757f3fSDimitry Andric               llvm::lookupBroadcastFoldTable(OpSrc64, 64)) {
36606c3fb27SDimitry Andric         OpBcst64 = Mem2Bcst->DstOp;
36706c3fb27SDimitry Andric         OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
36806c3fb27SDimitry Andric       }
369*5f757f3fSDimitry Andric     }
37006c3fb27SDimitry Andric     assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
37106c3fb27SDimitry Andric            "OperandNo mismatch");
37206c3fb27SDimitry Andric 
37306c3fb27SDimitry Andric     if (OpBcst32 || OpBcst64) {
37406c3fb27SDimitry Andric       unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
37506c3fb27SDimitry Andric       return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo);
37606c3fb27SDimitry Andric     }
377*5f757f3fSDimitry Andric     return false;
378*5f757f3fSDimitry Andric   };
379*5f757f3fSDimitry Andric 
380*5f757f3fSDimitry Andric   // Attempt to find a AVX512 mapping from a full width memory-fold instruction
381*5f757f3fSDimitry Andric   // to a broadcast-fold instruction variant.
382*5f757f3fSDimitry Andric   if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX)
383*5f757f3fSDimitry Andric     return ConvertToBroadcastAVX512(Opc, Opc);
384*5f757f3fSDimitry Andric 
385*5f757f3fSDimitry Andric   // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
386*5f757f3fSDimitry Andric   // conversion to see if we can convert to a broadcasted (integer) logic op.
387*5f757f3fSDimitry Andric   if (HasVLX && !HasDQI) {
388*5f757f3fSDimitry Andric     unsigned OpSrc32 = 0, OpSrc64 = 0;
389*5f757f3fSDimitry Andric     switch (Opc) {
390*5f757f3fSDimitry Andric     case X86::VANDPDrm:
391*5f757f3fSDimitry Andric     case X86::VANDPSrm:
392*5f757f3fSDimitry Andric     case X86::VPANDrm:
393*5f757f3fSDimitry Andric       OpSrc32 = X86 ::VPANDDZ128rm;
394*5f757f3fSDimitry Andric       OpSrc64 = X86 ::VPANDQZ128rm;
395*5f757f3fSDimitry Andric       break;
396*5f757f3fSDimitry Andric     case X86::VANDPDYrm:
397*5f757f3fSDimitry Andric     case X86::VANDPSYrm:
398*5f757f3fSDimitry Andric     case X86::VPANDYrm:
399*5f757f3fSDimitry Andric       OpSrc32 = X86 ::VPANDDZ256rm;
400*5f757f3fSDimitry Andric       OpSrc64 = X86 ::VPANDQZ256rm;
401*5f757f3fSDimitry Andric       break;
402*5f757f3fSDimitry Andric     case X86::VANDNPDrm:
403*5f757f3fSDimitry Andric     case X86::VANDNPSrm:
404*5f757f3fSDimitry Andric     case X86::VPANDNrm:
405*5f757f3fSDimitry Andric       OpSrc32 = X86 ::VPANDNDZ128rm;
406*5f757f3fSDimitry Andric       OpSrc64 = X86 ::VPANDNQZ128rm;
407*5f757f3fSDimitry Andric       break;
408*5f757f3fSDimitry Andric     case X86::VANDNPDYrm:
409*5f757f3fSDimitry Andric     case X86::VANDNPSYrm:
410*5f757f3fSDimitry Andric     case X86::VPANDNYrm:
411*5f757f3fSDimitry Andric       OpSrc32 = X86 ::VPANDNDZ256rm;
412*5f757f3fSDimitry Andric       OpSrc64 = X86 ::VPANDNQZ256rm;
413*5f757f3fSDimitry Andric       break;
414*5f757f3fSDimitry Andric     case X86::VORPDrm:
415*5f757f3fSDimitry Andric     case X86::VORPSrm:
416*5f757f3fSDimitry Andric     case X86::VPORrm:
417*5f757f3fSDimitry Andric       OpSrc32 = X86 ::VPORDZ128rm;
418*5f757f3fSDimitry Andric       OpSrc64 = X86 ::VPORQZ128rm;
419*5f757f3fSDimitry Andric       break;
420*5f757f3fSDimitry Andric     case X86::VORPDYrm:
421*5f757f3fSDimitry Andric     case X86::VORPSYrm:
422*5f757f3fSDimitry Andric     case X86::VPORYrm:
423*5f757f3fSDimitry Andric       OpSrc32 = X86 ::VPORDZ256rm;
424*5f757f3fSDimitry Andric       OpSrc64 = X86 ::VPORQZ256rm;
425*5f757f3fSDimitry Andric       break;
426*5f757f3fSDimitry Andric     case X86::VXORPDrm:
427*5f757f3fSDimitry Andric     case X86::VXORPSrm:
428*5f757f3fSDimitry Andric     case X86::VPXORrm:
429*5f757f3fSDimitry Andric       OpSrc32 = X86 ::VPXORDZ128rm;
430*5f757f3fSDimitry Andric       OpSrc64 = X86 ::VPXORQZ128rm;
431*5f757f3fSDimitry Andric       break;
432*5f757f3fSDimitry Andric     case X86::VXORPDYrm:
433*5f757f3fSDimitry Andric     case X86::VXORPSYrm:
434*5f757f3fSDimitry Andric     case X86::VPXORYrm:
435*5f757f3fSDimitry Andric       OpSrc32 = X86 ::VPXORDZ256rm;
436*5f757f3fSDimitry Andric       OpSrc64 = X86 ::VPXORQZ256rm;
437*5f757f3fSDimitry Andric       break;
438*5f757f3fSDimitry Andric     }
439*5f757f3fSDimitry Andric     if (OpSrc32 || OpSrc64)
440*5f757f3fSDimitry Andric       return ConvertToBroadcastAVX512(OpSrc32, OpSrc64);
44106c3fb27SDimitry Andric   }
44206c3fb27SDimitry Andric 
44306c3fb27SDimitry Andric   return false;
44406c3fb27SDimitry Andric }
44506c3fb27SDimitry Andric 
44606c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::runOnMachineFunction(MachineFunction &MF) {
44706c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n";);
44806c3fb27SDimitry Andric   bool Changed = false;
44906c3fb27SDimitry Andric   ST = &MF.getSubtarget<X86Subtarget>();
45006c3fb27SDimitry Andric   TII = ST->getInstrInfo();
45106c3fb27SDimitry Andric   SM = &ST->getSchedModel();
45206c3fb27SDimitry Andric 
45306c3fb27SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
45406c3fb27SDimitry Andric     for (MachineInstr &MI : MBB) {
45506c3fb27SDimitry Andric       if (processInstruction(MF, MBB, MI)) {
45606c3fb27SDimitry Andric         ++NumInstChanges;
45706c3fb27SDimitry Andric         Changed = true;
45806c3fb27SDimitry Andric       }
45906c3fb27SDimitry Andric     }
46006c3fb27SDimitry Andric   }
46106c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n";);
46206c3fb27SDimitry Andric   return Changed;
46306c3fb27SDimitry Andric }
464