106c3fb27SDimitry Andric //===-- X86FixupVectorConstants.cpp - optimize constant generation -------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file examines all full size vector constant pool loads and attempts to 1006c3fb27SDimitry Andric // replace them with smaller constant pool entries, including: 1106c3fb27SDimitry Andric // * Converting AVX512 memory-fold instructions to their broadcast-fold form 125f757f3fSDimitry Andric // * Broadcasting of full width loads. 1306c3fb27SDimitry Andric // * TODO: Sign/Zero extension of full width loads. 1406c3fb27SDimitry Andric // 1506c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1606c3fb27SDimitry Andric 1706c3fb27SDimitry Andric #include "X86.h" 1806c3fb27SDimitry Andric #include "X86InstrFoldTables.h" 1906c3fb27SDimitry Andric #include "X86InstrInfo.h" 2006c3fb27SDimitry Andric #include "X86Subtarget.h" 2106c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h" 2206c3fb27SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h" 2306c3fb27SDimitry Andric 2406c3fb27SDimitry Andric using namespace llvm; 2506c3fb27SDimitry Andric 2606c3fb27SDimitry Andric #define DEBUG_TYPE "x86-fixup-vector-constants" 2706c3fb27SDimitry Andric 2806c3fb27SDimitry Andric STATISTIC(NumInstChanges, "Number of instructions changes"); 2906c3fb27SDimitry Andric 3006c3fb27SDimitry Andric namespace { 3106c3fb27SDimitry Andric class X86FixupVectorConstantsPass : public MachineFunctionPass { 3206c3fb27SDimitry Andric public: 3306c3fb27SDimitry Andric static char ID; 3406c3fb27SDimitry Andric 3506c3fb27SDimitry Andric X86FixupVectorConstantsPass() : MachineFunctionPass(ID) {} 3606c3fb27SDimitry Andric 3706c3fb27SDimitry Andric StringRef getPassName() const override { 3806c3fb27SDimitry Andric return "X86 Fixup Vector Constants"; 3906c3fb27SDimitry Andric } 4006c3fb27SDimitry Andric 4106c3fb27SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 4206c3fb27SDimitry Andric bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB, 4306c3fb27SDimitry Andric MachineInstr &MI); 4406c3fb27SDimitry Andric 4506c3fb27SDimitry Andric // This pass runs after regalloc and doesn't support VReg operands. 4606c3fb27SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 4706c3fb27SDimitry Andric return MachineFunctionProperties().set( 4806c3fb27SDimitry Andric MachineFunctionProperties::Property::NoVRegs); 4906c3fb27SDimitry Andric } 5006c3fb27SDimitry Andric 5106c3fb27SDimitry Andric private: 5206c3fb27SDimitry Andric const X86InstrInfo *TII = nullptr; 5306c3fb27SDimitry Andric const X86Subtarget *ST = nullptr; 5406c3fb27SDimitry Andric const MCSchedModel *SM = nullptr; 5506c3fb27SDimitry Andric }; 5606c3fb27SDimitry Andric } // end anonymous namespace 5706c3fb27SDimitry Andric 5806c3fb27SDimitry Andric char X86FixupVectorConstantsPass::ID = 0; 5906c3fb27SDimitry Andric 6006c3fb27SDimitry Andric INITIALIZE_PASS(X86FixupVectorConstantsPass, DEBUG_TYPE, DEBUG_TYPE, false, false) 6106c3fb27SDimitry Andric 6206c3fb27SDimitry Andric FunctionPass *llvm::createX86FixupVectorConstants() { 6306c3fb27SDimitry Andric return new X86FixupVectorConstantsPass(); 6406c3fb27SDimitry Andric } 6506c3fb27SDimitry Andric 6606c3fb27SDimitry Andric // Attempt to extract the full width of bits data from the constant. 6706c3fb27SDimitry Andric static std::optional<APInt> extractConstantBits(const Constant *C) { 6806c3fb27SDimitry Andric unsigned NumBits = C->getType()->getPrimitiveSizeInBits(); 6906c3fb27SDimitry Andric 7006c3fb27SDimitry Andric if (auto *CInt = dyn_cast<ConstantInt>(C)) 7106c3fb27SDimitry Andric return CInt->getValue(); 7206c3fb27SDimitry Andric 7306c3fb27SDimitry Andric if (auto *CFP = dyn_cast<ConstantFP>(C)) 7406c3fb27SDimitry Andric return CFP->getValue().bitcastToAPInt(); 7506c3fb27SDimitry Andric 7606c3fb27SDimitry Andric if (auto *CV = dyn_cast<ConstantVector>(C)) { 7706c3fb27SDimitry Andric if (auto *CVSplat = CV->getSplatValue(/*AllowUndefs*/ true)) { 7806c3fb27SDimitry Andric if (std::optional<APInt> Bits = extractConstantBits(CVSplat)) { 7906c3fb27SDimitry Andric assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat"); 8006c3fb27SDimitry Andric return APInt::getSplat(NumBits, *Bits); 8106c3fb27SDimitry Andric } 8206c3fb27SDimitry Andric } 8306c3fb27SDimitry Andric } 8406c3fb27SDimitry Andric 8506c3fb27SDimitry Andric if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) { 8606c3fb27SDimitry Andric bool IsInteger = CDS->getElementType()->isIntegerTy(); 8706c3fb27SDimitry Andric bool IsFloat = CDS->getElementType()->isHalfTy() || 8806c3fb27SDimitry Andric CDS->getElementType()->isBFloatTy() || 8906c3fb27SDimitry Andric CDS->getElementType()->isFloatTy() || 9006c3fb27SDimitry Andric CDS->getElementType()->isDoubleTy(); 9106c3fb27SDimitry Andric if (IsInteger || IsFloat) { 9206c3fb27SDimitry Andric APInt Bits = APInt::getZero(NumBits); 9306c3fb27SDimitry Andric unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits(); 9406c3fb27SDimitry Andric for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { 9506c3fb27SDimitry Andric if (IsInteger) 9606c3fb27SDimitry Andric Bits.insertBits(CDS->getElementAsAPInt(I), I * EltBits); 9706c3fb27SDimitry Andric else 9806c3fb27SDimitry Andric Bits.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(), 9906c3fb27SDimitry Andric I * EltBits); 10006c3fb27SDimitry Andric } 10106c3fb27SDimitry Andric return Bits; 10206c3fb27SDimitry Andric } 10306c3fb27SDimitry Andric } 10406c3fb27SDimitry Andric 10506c3fb27SDimitry Andric return std::nullopt; 10606c3fb27SDimitry Andric } 10706c3fb27SDimitry Andric 10806c3fb27SDimitry Andric // Attempt to compute the splat width of bits data by normalizing the splat to 10906c3fb27SDimitry Andric // remove undefs. 11006c3fb27SDimitry Andric static std::optional<APInt> getSplatableConstant(const Constant *C, 11106c3fb27SDimitry Andric unsigned SplatBitWidth) { 11206c3fb27SDimitry Andric const Type *Ty = C->getType(); 11306c3fb27SDimitry Andric assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 && 11406c3fb27SDimitry Andric "Illegal splat width"); 11506c3fb27SDimitry Andric 11606c3fb27SDimitry Andric if (std::optional<APInt> Bits = extractConstantBits(C)) 11706c3fb27SDimitry Andric if (Bits->isSplat(SplatBitWidth)) 11806c3fb27SDimitry Andric return Bits->trunc(SplatBitWidth); 11906c3fb27SDimitry Andric 12006c3fb27SDimitry Andric // Detect general splats with undefs. 12106c3fb27SDimitry Andric // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting? 12206c3fb27SDimitry Andric if (auto *CV = dyn_cast<ConstantVector>(C)) { 12306c3fb27SDimitry Andric unsigned NumOps = CV->getNumOperands(); 12406c3fb27SDimitry Andric unsigned NumEltsBits = Ty->getScalarSizeInBits(); 12506c3fb27SDimitry Andric unsigned NumScaleOps = SplatBitWidth / NumEltsBits; 12606c3fb27SDimitry Andric if ((SplatBitWidth % NumEltsBits) == 0) { 12706c3fb27SDimitry Andric // Collect the elements and ensure that within the repeated splat sequence 12806c3fb27SDimitry Andric // they either match or are undef. 12906c3fb27SDimitry Andric SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr); 13006c3fb27SDimitry Andric for (unsigned Idx = 0; Idx != NumOps; ++Idx) { 13106c3fb27SDimitry Andric if (Constant *Elt = CV->getAggregateElement(Idx)) { 13206c3fb27SDimitry Andric if (isa<UndefValue>(Elt)) 13306c3fb27SDimitry Andric continue; 13406c3fb27SDimitry Andric unsigned SplatIdx = Idx % NumScaleOps; 13506c3fb27SDimitry Andric if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) { 13606c3fb27SDimitry Andric Sequence[SplatIdx] = Elt; 13706c3fb27SDimitry Andric continue; 13806c3fb27SDimitry Andric } 13906c3fb27SDimitry Andric } 14006c3fb27SDimitry Andric return std::nullopt; 14106c3fb27SDimitry Andric } 14206c3fb27SDimitry Andric // Extract the constant bits forming the splat and insert into the bits 14306c3fb27SDimitry Andric // data, leave undef as zero. 14406c3fb27SDimitry Andric APInt SplatBits = APInt::getZero(SplatBitWidth); 14506c3fb27SDimitry Andric for (unsigned I = 0; I != NumScaleOps; ++I) { 14606c3fb27SDimitry Andric if (!Sequence[I]) 14706c3fb27SDimitry Andric continue; 14806c3fb27SDimitry Andric if (std::optional<APInt> Bits = extractConstantBits(Sequence[I])) { 14906c3fb27SDimitry Andric SplatBits.insertBits(*Bits, I * Bits->getBitWidth()); 15006c3fb27SDimitry Andric continue; 15106c3fb27SDimitry Andric } 15206c3fb27SDimitry Andric return std::nullopt; 15306c3fb27SDimitry Andric } 15406c3fb27SDimitry Andric return SplatBits; 15506c3fb27SDimitry Andric } 15606c3fb27SDimitry Andric } 15706c3fb27SDimitry Andric 15806c3fb27SDimitry Andric return std::nullopt; 15906c3fb27SDimitry Andric } 16006c3fb27SDimitry Andric 161*7a6dacacSDimitry Andric // Split raw bits into a constant vector of elements of a specific bit width. 162*7a6dacacSDimitry Andric // NOTE: We don't always bother converting to scalars if the vector length is 1. 163*7a6dacacSDimitry Andric static Constant *rebuildConstant(LLVMContext &Ctx, Type *SclTy, 164*7a6dacacSDimitry Andric const APInt &Bits, unsigned NumSclBits) { 165*7a6dacacSDimitry Andric unsigned BitWidth = Bits.getBitWidth(); 166*7a6dacacSDimitry Andric 167*7a6dacacSDimitry Andric if (NumSclBits == 8) { 168*7a6dacacSDimitry Andric SmallVector<uint8_t> RawBits; 169*7a6dacacSDimitry Andric for (unsigned I = 0; I != BitWidth; I += 8) 170*7a6dacacSDimitry Andric RawBits.push_back(Bits.extractBits(8, I).getZExtValue()); 171*7a6dacacSDimitry Andric return ConstantDataVector::get(Ctx, RawBits); 172*7a6dacacSDimitry Andric } 173*7a6dacacSDimitry Andric 174*7a6dacacSDimitry Andric if (NumSclBits == 16) { 175*7a6dacacSDimitry Andric SmallVector<uint16_t> RawBits; 176*7a6dacacSDimitry Andric for (unsigned I = 0; I != BitWidth; I += 16) 177*7a6dacacSDimitry Andric RawBits.push_back(Bits.extractBits(16, I).getZExtValue()); 178*7a6dacacSDimitry Andric if (SclTy->is16bitFPTy()) 179*7a6dacacSDimitry Andric return ConstantDataVector::getFP(SclTy, RawBits); 180*7a6dacacSDimitry Andric return ConstantDataVector::get(Ctx, RawBits); 181*7a6dacacSDimitry Andric } 182*7a6dacacSDimitry Andric 183*7a6dacacSDimitry Andric if (NumSclBits == 32) { 184*7a6dacacSDimitry Andric SmallVector<uint32_t> RawBits; 185*7a6dacacSDimitry Andric for (unsigned I = 0; I != BitWidth; I += 32) 186*7a6dacacSDimitry Andric RawBits.push_back(Bits.extractBits(32, I).getZExtValue()); 187*7a6dacacSDimitry Andric if (SclTy->isFloatTy()) 188*7a6dacacSDimitry Andric return ConstantDataVector::getFP(SclTy, RawBits); 189*7a6dacacSDimitry Andric return ConstantDataVector::get(Ctx, RawBits); 190*7a6dacacSDimitry Andric } 191*7a6dacacSDimitry Andric 192*7a6dacacSDimitry Andric assert(NumSclBits == 64 && "Unhandled vector element width"); 193*7a6dacacSDimitry Andric 194*7a6dacacSDimitry Andric SmallVector<uint64_t> RawBits; 195*7a6dacacSDimitry Andric for (unsigned I = 0; I != BitWidth; I += 64) 196*7a6dacacSDimitry Andric RawBits.push_back(Bits.extractBits(64, I).getZExtValue()); 197*7a6dacacSDimitry Andric if (SclTy->isDoubleTy()) 198*7a6dacacSDimitry Andric return ConstantDataVector::getFP(SclTy, RawBits); 199*7a6dacacSDimitry Andric return ConstantDataVector::get(Ctx, RawBits); 200*7a6dacacSDimitry Andric } 201*7a6dacacSDimitry Andric 20206c3fb27SDimitry Andric // Attempt to rebuild a normalized splat vector constant of the requested splat 20306c3fb27SDimitry Andric // width, built up of potentially smaller scalar values. 20406c3fb27SDimitry Andric static Constant *rebuildSplatableConstant(const Constant *C, 20506c3fb27SDimitry Andric unsigned SplatBitWidth) { 20606c3fb27SDimitry Andric std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth); 20706c3fb27SDimitry Andric if (!Splat) 20806c3fb27SDimitry Andric return nullptr; 20906c3fb27SDimitry Andric 21006c3fb27SDimitry Andric // Determine scalar size to use for the constant splat vector, clamping as we 21106c3fb27SDimitry Andric // might have found a splat smaller than the original constant data. 21206c3fb27SDimitry Andric const Type *OriginalType = C->getType(); 21306c3fb27SDimitry Andric Type *SclTy = OriginalType->getScalarType(); 21406c3fb27SDimitry Andric unsigned NumSclBits = SclTy->getPrimitiveSizeInBits(); 21506c3fb27SDimitry Andric NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth); 21606c3fb27SDimitry Andric 21706c3fb27SDimitry Andric // Fallback to i64 / double. 218*7a6dacacSDimitry Andric NumSclBits = (NumSclBits == 8 || NumSclBits == 16 || NumSclBits == 32) 219*7a6dacacSDimitry Andric ? NumSclBits 220*7a6dacacSDimitry Andric : 64; 221*7a6dacacSDimitry Andric 222*7a6dacacSDimitry Andric // Extract per-element bits. 223*7a6dacacSDimitry Andric return rebuildConstant(OriginalType->getContext(), SclTy, *Splat, NumSclBits); 22406c3fb27SDimitry Andric } 22506c3fb27SDimitry Andric 22606c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF, 22706c3fb27SDimitry Andric MachineBasicBlock &MBB, 22806c3fb27SDimitry Andric MachineInstr &MI) { 22906c3fb27SDimitry Andric unsigned Opc = MI.getOpcode(); 23006c3fb27SDimitry Andric MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool(); 2315f757f3fSDimitry Andric bool HasAVX2 = ST->hasAVX2(); 23206c3fb27SDimitry Andric bool HasDQI = ST->hasDQI(); 23306c3fb27SDimitry Andric bool HasBWI = ST->hasBWI(); 2345f757f3fSDimitry Andric bool HasVLX = ST->hasVLX(); 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128, 23706c3fb27SDimitry Andric unsigned OpBcst64, unsigned OpBcst32, 23806c3fb27SDimitry Andric unsigned OpBcst16, unsigned OpBcst8, 23906c3fb27SDimitry Andric unsigned OperandNo) { 24006c3fb27SDimitry Andric assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) && 24106c3fb27SDimitry Andric "Unexpected number of operands!"); 24206c3fb27SDimitry Andric 243*7a6dacacSDimitry Andric if (auto *C = X86::getConstantFromPool(MI, OperandNo)) { 24406c3fb27SDimitry Andric // Attempt to detect a suitable splat from increasing splat widths. 24506c3fb27SDimitry Andric std::pair<unsigned, unsigned> Broadcasts[] = { 24606c3fb27SDimitry Andric {8, OpBcst8}, {16, OpBcst16}, {32, OpBcst32}, 24706c3fb27SDimitry Andric {64, OpBcst64}, {128, OpBcst128}, {256, OpBcst256}, 24806c3fb27SDimitry Andric }; 24906c3fb27SDimitry Andric for (auto [BitWidth, OpBcst] : Broadcasts) { 25006c3fb27SDimitry Andric if (OpBcst) { 25106c3fb27SDimitry Andric // Construct a suitable splat constant and adjust the MI to 25206c3fb27SDimitry Andric // use the new constant pool entry. 25306c3fb27SDimitry Andric if (Constant *NewCst = rebuildSplatableConstant(C, BitWidth)) { 25406c3fb27SDimitry Andric unsigned NewCPI = 25506c3fb27SDimitry Andric CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8)); 25606c3fb27SDimitry Andric MI.setDesc(TII->get(OpBcst)); 257*7a6dacacSDimitry Andric MI.getOperand(OperandNo + X86::AddrDisp).setIndex(NewCPI); 25806c3fb27SDimitry Andric return true; 25906c3fb27SDimitry Andric } 26006c3fb27SDimitry Andric } 26106c3fb27SDimitry Andric } 26206c3fb27SDimitry Andric } 26306c3fb27SDimitry Andric return false; 26406c3fb27SDimitry Andric }; 26506c3fb27SDimitry Andric 26606c3fb27SDimitry Andric // Attempt to convert full width vector loads into broadcast loads. 26706c3fb27SDimitry Andric switch (Opc) { 26806c3fb27SDimitry Andric /* FP Loads */ 26906c3fb27SDimitry Andric case X86::MOVAPDrm: 27006c3fb27SDimitry Andric case X86::MOVAPSrm: 27106c3fb27SDimitry Andric case X86::MOVUPDrm: 27206c3fb27SDimitry Andric case X86::MOVUPSrm: 27306c3fb27SDimitry Andric // TODO: SSE3 MOVDDUP Handling 27406c3fb27SDimitry Andric return false; 27506c3fb27SDimitry Andric case X86::VMOVAPDrm: 27606c3fb27SDimitry Andric case X86::VMOVAPSrm: 27706c3fb27SDimitry Andric case X86::VMOVUPDrm: 27806c3fb27SDimitry Andric case X86::VMOVUPSrm: 27906c3fb27SDimitry Andric return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0, 28006c3fb27SDimitry Andric 1); 28106c3fb27SDimitry Andric case X86::VMOVAPDYrm: 28206c3fb27SDimitry Andric case X86::VMOVAPSYrm: 28306c3fb27SDimitry Andric case X86::VMOVUPDYrm: 28406c3fb27SDimitry Andric case X86::VMOVUPSYrm: 2855f757f3fSDimitry Andric return ConvertToBroadcast(0, X86::VBROADCASTF128rm, X86::VBROADCASTSDYrm, 28606c3fb27SDimitry Andric X86::VBROADCASTSSYrm, 0, 0, 1); 28706c3fb27SDimitry Andric case X86::VMOVAPDZ128rm: 28806c3fb27SDimitry Andric case X86::VMOVAPSZ128rm: 28906c3fb27SDimitry Andric case X86::VMOVUPDZ128rm: 29006c3fb27SDimitry Andric case X86::VMOVUPSZ128rm: 29106c3fb27SDimitry Andric return ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm, 29206c3fb27SDimitry Andric X86::VBROADCASTSSZ128rm, 0, 0, 1); 29306c3fb27SDimitry Andric case X86::VMOVAPDZ256rm: 29406c3fb27SDimitry Andric case X86::VMOVAPSZ256rm: 29506c3fb27SDimitry Andric case X86::VMOVUPDZ256rm: 29606c3fb27SDimitry Andric case X86::VMOVUPSZ256rm: 2975f757f3fSDimitry Andric return ConvertToBroadcast(0, X86::VBROADCASTF32X4Z256rm, 2985f757f3fSDimitry Andric X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm, 2995f757f3fSDimitry Andric 0, 0, 1); 30006c3fb27SDimitry Andric case X86::VMOVAPDZrm: 30106c3fb27SDimitry Andric case X86::VMOVAPSZrm: 30206c3fb27SDimitry Andric case X86::VMOVUPDZrm: 30306c3fb27SDimitry Andric case X86::VMOVUPSZrm: 3045f757f3fSDimitry Andric return ConvertToBroadcast(X86::VBROADCASTF64X4rm, X86::VBROADCASTF32X4rm, 3055f757f3fSDimitry Andric X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0, 3065f757f3fSDimitry Andric 1); 30706c3fb27SDimitry Andric /* Integer Loads */ 30806c3fb27SDimitry Andric case X86::VMOVDQArm: 30906c3fb27SDimitry Andric case X86::VMOVDQUrm: 3105f757f3fSDimitry Andric return ConvertToBroadcast( 3115f757f3fSDimitry Andric 0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, 3125f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, 3135f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTWrm : 0, HasAVX2 ? X86::VPBROADCASTBrm : 0, 31406c3fb27SDimitry Andric 1); 31506c3fb27SDimitry Andric case X86::VMOVDQAYrm: 31606c3fb27SDimitry Andric case X86::VMOVDQUYrm: 3175f757f3fSDimitry Andric return ConvertToBroadcast( 3185f757f3fSDimitry Andric 0, HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, 3195f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, 3205f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, 3215f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTWYrm : 0, HasAVX2 ? X86::VPBROADCASTBYrm : 0, 3225f757f3fSDimitry Andric 1); 32306c3fb27SDimitry Andric case X86::VMOVDQA32Z128rm: 32406c3fb27SDimitry Andric case X86::VMOVDQA64Z128rm: 32506c3fb27SDimitry Andric case X86::VMOVDQU32Z128rm: 32606c3fb27SDimitry Andric case X86::VMOVDQU64Z128rm: 32706c3fb27SDimitry Andric return ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm, 32806c3fb27SDimitry Andric X86::VPBROADCASTDZ128rm, 32906c3fb27SDimitry Andric HasBWI ? X86::VPBROADCASTWZ128rm : 0, 33006c3fb27SDimitry Andric HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1); 33106c3fb27SDimitry Andric case X86::VMOVDQA32Z256rm: 33206c3fb27SDimitry Andric case X86::VMOVDQA64Z256rm: 33306c3fb27SDimitry Andric case X86::VMOVDQU32Z256rm: 33406c3fb27SDimitry Andric case X86::VMOVDQU64Z256rm: 3355f757f3fSDimitry Andric return ConvertToBroadcast(0, X86::VBROADCASTI32X4Z256rm, 33606c3fb27SDimitry Andric X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm, 33706c3fb27SDimitry Andric HasBWI ? X86::VPBROADCASTWZ256rm : 0, 33806c3fb27SDimitry Andric HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1); 33906c3fb27SDimitry Andric case X86::VMOVDQA32Zrm: 34006c3fb27SDimitry Andric case X86::VMOVDQA64Zrm: 34106c3fb27SDimitry Andric case X86::VMOVDQU32Zrm: 34206c3fb27SDimitry Andric case X86::VMOVDQU64Zrm: 3435f757f3fSDimitry Andric return ConvertToBroadcast(X86::VBROADCASTI64X4rm, X86::VBROADCASTI32X4rm, 34406c3fb27SDimitry Andric X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm, 3455f757f3fSDimitry Andric HasBWI ? X86::VPBROADCASTWZrm : 0, 3465f757f3fSDimitry Andric HasBWI ? X86::VPBROADCASTBZrm : 0, 1); 34706c3fb27SDimitry Andric } 34806c3fb27SDimitry Andric 3495f757f3fSDimitry Andric auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) { 35006c3fb27SDimitry Andric unsigned OpBcst32 = 0, OpBcst64 = 0; 35106c3fb27SDimitry Andric unsigned OpNoBcst32 = 0, OpNoBcst64 = 0; 3525f757f3fSDimitry Andric if (OpSrc32) { 3535f757f3fSDimitry Andric if (const X86FoldTableEntry *Mem2Bcst = 3545f757f3fSDimitry Andric llvm::lookupBroadcastFoldTable(OpSrc32, 32)) { 35506c3fb27SDimitry Andric OpBcst32 = Mem2Bcst->DstOp; 35606c3fb27SDimitry Andric OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK; 35706c3fb27SDimitry Andric } 3585f757f3fSDimitry Andric } 3595f757f3fSDimitry Andric if (OpSrc64) { 3605f757f3fSDimitry Andric if (const X86FoldTableEntry *Mem2Bcst = 3615f757f3fSDimitry Andric llvm::lookupBroadcastFoldTable(OpSrc64, 64)) { 36206c3fb27SDimitry Andric OpBcst64 = Mem2Bcst->DstOp; 36306c3fb27SDimitry Andric OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK; 36406c3fb27SDimitry Andric } 3655f757f3fSDimitry Andric } 36606c3fb27SDimitry Andric assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) && 36706c3fb27SDimitry Andric "OperandNo mismatch"); 36806c3fb27SDimitry Andric 36906c3fb27SDimitry Andric if (OpBcst32 || OpBcst64) { 37006c3fb27SDimitry Andric unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32; 37106c3fb27SDimitry Andric return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo); 37206c3fb27SDimitry Andric } 3735f757f3fSDimitry Andric return false; 3745f757f3fSDimitry Andric }; 3755f757f3fSDimitry Andric 3765f757f3fSDimitry Andric // Attempt to find a AVX512 mapping from a full width memory-fold instruction 3775f757f3fSDimitry Andric // to a broadcast-fold instruction variant. 3785f757f3fSDimitry Andric if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX) 3795f757f3fSDimitry Andric return ConvertToBroadcastAVX512(Opc, Opc); 3805f757f3fSDimitry Andric 3815f757f3fSDimitry Andric // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic 3825f757f3fSDimitry Andric // conversion to see if we can convert to a broadcasted (integer) logic op. 3835f757f3fSDimitry Andric if (HasVLX && !HasDQI) { 3845f757f3fSDimitry Andric unsigned OpSrc32 = 0, OpSrc64 = 0; 3855f757f3fSDimitry Andric switch (Opc) { 3865f757f3fSDimitry Andric case X86::VANDPDrm: 3875f757f3fSDimitry Andric case X86::VANDPSrm: 3885f757f3fSDimitry Andric case X86::VPANDrm: 3895f757f3fSDimitry Andric OpSrc32 = X86 ::VPANDDZ128rm; 3905f757f3fSDimitry Andric OpSrc64 = X86 ::VPANDQZ128rm; 3915f757f3fSDimitry Andric break; 3925f757f3fSDimitry Andric case X86::VANDPDYrm: 3935f757f3fSDimitry Andric case X86::VANDPSYrm: 3945f757f3fSDimitry Andric case X86::VPANDYrm: 3955f757f3fSDimitry Andric OpSrc32 = X86 ::VPANDDZ256rm; 3965f757f3fSDimitry Andric OpSrc64 = X86 ::VPANDQZ256rm; 3975f757f3fSDimitry Andric break; 3985f757f3fSDimitry Andric case X86::VANDNPDrm: 3995f757f3fSDimitry Andric case X86::VANDNPSrm: 4005f757f3fSDimitry Andric case X86::VPANDNrm: 4015f757f3fSDimitry Andric OpSrc32 = X86 ::VPANDNDZ128rm; 4025f757f3fSDimitry Andric OpSrc64 = X86 ::VPANDNQZ128rm; 4035f757f3fSDimitry Andric break; 4045f757f3fSDimitry Andric case X86::VANDNPDYrm: 4055f757f3fSDimitry Andric case X86::VANDNPSYrm: 4065f757f3fSDimitry Andric case X86::VPANDNYrm: 4075f757f3fSDimitry Andric OpSrc32 = X86 ::VPANDNDZ256rm; 4085f757f3fSDimitry Andric OpSrc64 = X86 ::VPANDNQZ256rm; 4095f757f3fSDimitry Andric break; 4105f757f3fSDimitry Andric case X86::VORPDrm: 4115f757f3fSDimitry Andric case X86::VORPSrm: 4125f757f3fSDimitry Andric case X86::VPORrm: 4135f757f3fSDimitry Andric OpSrc32 = X86 ::VPORDZ128rm; 4145f757f3fSDimitry Andric OpSrc64 = X86 ::VPORQZ128rm; 4155f757f3fSDimitry Andric break; 4165f757f3fSDimitry Andric case X86::VORPDYrm: 4175f757f3fSDimitry Andric case X86::VORPSYrm: 4185f757f3fSDimitry Andric case X86::VPORYrm: 4195f757f3fSDimitry Andric OpSrc32 = X86 ::VPORDZ256rm; 4205f757f3fSDimitry Andric OpSrc64 = X86 ::VPORQZ256rm; 4215f757f3fSDimitry Andric break; 4225f757f3fSDimitry Andric case X86::VXORPDrm: 4235f757f3fSDimitry Andric case X86::VXORPSrm: 4245f757f3fSDimitry Andric case X86::VPXORrm: 4255f757f3fSDimitry Andric OpSrc32 = X86 ::VPXORDZ128rm; 4265f757f3fSDimitry Andric OpSrc64 = X86 ::VPXORQZ128rm; 4275f757f3fSDimitry Andric break; 4285f757f3fSDimitry Andric case X86::VXORPDYrm: 4295f757f3fSDimitry Andric case X86::VXORPSYrm: 4305f757f3fSDimitry Andric case X86::VPXORYrm: 4315f757f3fSDimitry Andric OpSrc32 = X86 ::VPXORDZ256rm; 4325f757f3fSDimitry Andric OpSrc64 = X86 ::VPXORQZ256rm; 4335f757f3fSDimitry Andric break; 4345f757f3fSDimitry Andric } 4355f757f3fSDimitry Andric if (OpSrc32 || OpSrc64) 4365f757f3fSDimitry Andric return ConvertToBroadcastAVX512(OpSrc32, OpSrc64); 43706c3fb27SDimitry Andric } 43806c3fb27SDimitry Andric 43906c3fb27SDimitry Andric return false; 44006c3fb27SDimitry Andric } 44106c3fb27SDimitry Andric 44206c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::runOnMachineFunction(MachineFunction &MF) { 44306c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n";); 44406c3fb27SDimitry Andric bool Changed = false; 44506c3fb27SDimitry Andric ST = &MF.getSubtarget<X86Subtarget>(); 44606c3fb27SDimitry Andric TII = ST->getInstrInfo(); 44706c3fb27SDimitry Andric SM = &ST->getSchedModel(); 44806c3fb27SDimitry Andric 44906c3fb27SDimitry Andric for (MachineBasicBlock &MBB : MF) { 45006c3fb27SDimitry Andric for (MachineInstr &MI : MBB) { 45106c3fb27SDimitry Andric if (processInstruction(MF, MBB, MI)) { 45206c3fb27SDimitry Andric ++NumInstChanges; 45306c3fb27SDimitry Andric Changed = true; 45406c3fb27SDimitry Andric } 45506c3fb27SDimitry Andric } 45606c3fb27SDimitry Andric } 45706c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n";); 45806c3fb27SDimitry Andric return Changed; 45906c3fb27SDimitry Andric } 460