106c3fb27SDimitry Andric //===-- X86FixupVectorConstants.cpp - optimize constant generation -------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file examines all full size vector constant pool loads and attempts to 1006c3fb27SDimitry Andric // replace them with smaller constant pool entries, including: 1106c3fb27SDimitry Andric // * Converting AVX512 memory-fold instructions to their broadcast-fold form 12*5f757f3fSDimitry Andric // * Broadcasting of full width loads. 1306c3fb27SDimitry Andric // * TODO: Sign/Zero extension of full width loads. 1406c3fb27SDimitry Andric // 1506c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1606c3fb27SDimitry Andric 1706c3fb27SDimitry Andric #include "X86.h" 1806c3fb27SDimitry Andric #include "X86InstrFoldTables.h" 1906c3fb27SDimitry Andric #include "X86InstrInfo.h" 2006c3fb27SDimitry Andric #include "X86Subtarget.h" 2106c3fb27SDimitry Andric #include "llvm/ADT/Statistic.h" 2206c3fb27SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h" 2306c3fb27SDimitry Andric 2406c3fb27SDimitry Andric using namespace llvm; 2506c3fb27SDimitry Andric 2606c3fb27SDimitry Andric #define DEBUG_TYPE "x86-fixup-vector-constants" 2706c3fb27SDimitry Andric 2806c3fb27SDimitry Andric STATISTIC(NumInstChanges, "Number of instructions changes"); 2906c3fb27SDimitry Andric 3006c3fb27SDimitry Andric namespace { 3106c3fb27SDimitry Andric class X86FixupVectorConstantsPass : public MachineFunctionPass { 3206c3fb27SDimitry Andric public: 3306c3fb27SDimitry Andric static char ID; 3406c3fb27SDimitry Andric 3506c3fb27SDimitry Andric X86FixupVectorConstantsPass() : MachineFunctionPass(ID) {} 3606c3fb27SDimitry Andric 3706c3fb27SDimitry Andric StringRef getPassName() const override { 3806c3fb27SDimitry Andric return "X86 Fixup Vector Constants"; 3906c3fb27SDimitry Andric } 4006c3fb27SDimitry Andric 4106c3fb27SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 4206c3fb27SDimitry Andric bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB, 4306c3fb27SDimitry Andric MachineInstr &MI); 4406c3fb27SDimitry Andric 4506c3fb27SDimitry Andric // This pass runs after regalloc and doesn't support VReg operands. 4606c3fb27SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 4706c3fb27SDimitry Andric return MachineFunctionProperties().set( 4806c3fb27SDimitry Andric MachineFunctionProperties::Property::NoVRegs); 4906c3fb27SDimitry Andric } 5006c3fb27SDimitry Andric 5106c3fb27SDimitry Andric private: 5206c3fb27SDimitry Andric const X86InstrInfo *TII = nullptr; 5306c3fb27SDimitry Andric const X86Subtarget *ST = nullptr; 5406c3fb27SDimitry Andric const MCSchedModel *SM = nullptr; 5506c3fb27SDimitry Andric }; 5606c3fb27SDimitry Andric } // end anonymous namespace 5706c3fb27SDimitry Andric 5806c3fb27SDimitry Andric char X86FixupVectorConstantsPass::ID = 0; 5906c3fb27SDimitry Andric 6006c3fb27SDimitry Andric INITIALIZE_PASS(X86FixupVectorConstantsPass, DEBUG_TYPE, DEBUG_TYPE, false, false) 6106c3fb27SDimitry Andric 6206c3fb27SDimitry Andric FunctionPass *llvm::createX86FixupVectorConstants() { 6306c3fb27SDimitry Andric return new X86FixupVectorConstantsPass(); 6406c3fb27SDimitry Andric } 6506c3fb27SDimitry Andric 6606c3fb27SDimitry Andric static const Constant *getConstantFromPool(const MachineInstr &MI, 6706c3fb27SDimitry Andric const MachineOperand &Op) { 6806c3fb27SDimitry Andric if (!Op.isCPI() || Op.getOffset() != 0) 6906c3fb27SDimitry Andric return nullptr; 7006c3fb27SDimitry Andric 7106c3fb27SDimitry Andric ArrayRef<MachineConstantPoolEntry> Constants = 7206c3fb27SDimitry Andric MI.getParent()->getParent()->getConstantPool()->getConstants(); 7306c3fb27SDimitry Andric const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()]; 7406c3fb27SDimitry Andric 7506c3fb27SDimitry Andric // Bail if this is a machine constant pool entry, we won't be able to dig out 7606c3fb27SDimitry Andric // anything useful. 7706c3fb27SDimitry Andric if (ConstantEntry.isMachineConstantPoolEntry()) 7806c3fb27SDimitry Andric return nullptr; 7906c3fb27SDimitry Andric 8006c3fb27SDimitry Andric return ConstantEntry.Val.ConstVal; 8106c3fb27SDimitry Andric } 8206c3fb27SDimitry Andric 8306c3fb27SDimitry Andric // Attempt to extract the full width of bits data from the constant. 8406c3fb27SDimitry Andric static std::optional<APInt> extractConstantBits(const Constant *C) { 8506c3fb27SDimitry Andric unsigned NumBits = C->getType()->getPrimitiveSizeInBits(); 8606c3fb27SDimitry Andric 8706c3fb27SDimitry Andric if (auto *CInt = dyn_cast<ConstantInt>(C)) 8806c3fb27SDimitry Andric return CInt->getValue(); 8906c3fb27SDimitry Andric 9006c3fb27SDimitry Andric if (auto *CFP = dyn_cast<ConstantFP>(C)) 9106c3fb27SDimitry Andric return CFP->getValue().bitcastToAPInt(); 9206c3fb27SDimitry Andric 9306c3fb27SDimitry Andric if (auto *CV = dyn_cast<ConstantVector>(C)) { 9406c3fb27SDimitry Andric if (auto *CVSplat = CV->getSplatValue(/*AllowUndefs*/ true)) { 9506c3fb27SDimitry Andric if (std::optional<APInt> Bits = extractConstantBits(CVSplat)) { 9606c3fb27SDimitry Andric assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat"); 9706c3fb27SDimitry Andric return APInt::getSplat(NumBits, *Bits); 9806c3fb27SDimitry Andric } 9906c3fb27SDimitry Andric } 10006c3fb27SDimitry Andric } 10106c3fb27SDimitry Andric 10206c3fb27SDimitry Andric if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) { 10306c3fb27SDimitry Andric bool IsInteger = CDS->getElementType()->isIntegerTy(); 10406c3fb27SDimitry Andric bool IsFloat = CDS->getElementType()->isHalfTy() || 10506c3fb27SDimitry Andric CDS->getElementType()->isBFloatTy() || 10606c3fb27SDimitry Andric CDS->getElementType()->isFloatTy() || 10706c3fb27SDimitry Andric CDS->getElementType()->isDoubleTy(); 10806c3fb27SDimitry Andric if (IsInteger || IsFloat) { 10906c3fb27SDimitry Andric APInt Bits = APInt::getZero(NumBits); 11006c3fb27SDimitry Andric unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits(); 11106c3fb27SDimitry Andric for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { 11206c3fb27SDimitry Andric if (IsInteger) 11306c3fb27SDimitry Andric Bits.insertBits(CDS->getElementAsAPInt(I), I * EltBits); 11406c3fb27SDimitry Andric else 11506c3fb27SDimitry Andric Bits.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(), 11606c3fb27SDimitry Andric I * EltBits); 11706c3fb27SDimitry Andric } 11806c3fb27SDimitry Andric return Bits; 11906c3fb27SDimitry Andric } 12006c3fb27SDimitry Andric } 12106c3fb27SDimitry Andric 12206c3fb27SDimitry Andric return std::nullopt; 12306c3fb27SDimitry Andric } 12406c3fb27SDimitry Andric 12506c3fb27SDimitry Andric // Attempt to compute the splat width of bits data by normalizing the splat to 12606c3fb27SDimitry Andric // remove undefs. 12706c3fb27SDimitry Andric static std::optional<APInt> getSplatableConstant(const Constant *C, 12806c3fb27SDimitry Andric unsigned SplatBitWidth) { 12906c3fb27SDimitry Andric const Type *Ty = C->getType(); 13006c3fb27SDimitry Andric assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 && 13106c3fb27SDimitry Andric "Illegal splat width"); 13206c3fb27SDimitry Andric 13306c3fb27SDimitry Andric if (std::optional<APInt> Bits = extractConstantBits(C)) 13406c3fb27SDimitry Andric if (Bits->isSplat(SplatBitWidth)) 13506c3fb27SDimitry Andric return Bits->trunc(SplatBitWidth); 13606c3fb27SDimitry Andric 13706c3fb27SDimitry Andric // Detect general splats with undefs. 13806c3fb27SDimitry Andric // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting? 13906c3fb27SDimitry Andric if (auto *CV = dyn_cast<ConstantVector>(C)) { 14006c3fb27SDimitry Andric unsigned NumOps = CV->getNumOperands(); 14106c3fb27SDimitry Andric unsigned NumEltsBits = Ty->getScalarSizeInBits(); 14206c3fb27SDimitry Andric unsigned NumScaleOps = SplatBitWidth / NumEltsBits; 14306c3fb27SDimitry Andric if ((SplatBitWidth % NumEltsBits) == 0) { 14406c3fb27SDimitry Andric // Collect the elements and ensure that within the repeated splat sequence 14506c3fb27SDimitry Andric // they either match or are undef. 14606c3fb27SDimitry Andric SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr); 14706c3fb27SDimitry Andric for (unsigned Idx = 0; Idx != NumOps; ++Idx) { 14806c3fb27SDimitry Andric if (Constant *Elt = CV->getAggregateElement(Idx)) { 14906c3fb27SDimitry Andric if (isa<UndefValue>(Elt)) 15006c3fb27SDimitry Andric continue; 15106c3fb27SDimitry Andric unsigned SplatIdx = Idx % NumScaleOps; 15206c3fb27SDimitry Andric if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) { 15306c3fb27SDimitry Andric Sequence[SplatIdx] = Elt; 15406c3fb27SDimitry Andric continue; 15506c3fb27SDimitry Andric } 15606c3fb27SDimitry Andric } 15706c3fb27SDimitry Andric return std::nullopt; 15806c3fb27SDimitry Andric } 15906c3fb27SDimitry Andric // Extract the constant bits forming the splat and insert into the bits 16006c3fb27SDimitry Andric // data, leave undef as zero. 16106c3fb27SDimitry Andric APInt SplatBits = APInt::getZero(SplatBitWidth); 16206c3fb27SDimitry Andric for (unsigned I = 0; I != NumScaleOps; ++I) { 16306c3fb27SDimitry Andric if (!Sequence[I]) 16406c3fb27SDimitry Andric continue; 16506c3fb27SDimitry Andric if (std::optional<APInt> Bits = extractConstantBits(Sequence[I])) { 16606c3fb27SDimitry Andric SplatBits.insertBits(*Bits, I * Bits->getBitWidth()); 16706c3fb27SDimitry Andric continue; 16806c3fb27SDimitry Andric } 16906c3fb27SDimitry Andric return std::nullopt; 17006c3fb27SDimitry Andric } 17106c3fb27SDimitry Andric return SplatBits; 17206c3fb27SDimitry Andric } 17306c3fb27SDimitry Andric } 17406c3fb27SDimitry Andric 17506c3fb27SDimitry Andric return std::nullopt; 17606c3fb27SDimitry Andric } 17706c3fb27SDimitry Andric 17806c3fb27SDimitry Andric // Attempt to rebuild a normalized splat vector constant of the requested splat 17906c3fb27SDimitry Andric // width, built up of potentially smaller scalar values. 18006c3fb27SDimitry Andric // NOTE: We don't always bother converting to scalars if the vector length is 1. 18106c3fb27SDimitry Andric static Constant *rebuildSplatableConstant(const Constant *C, 18206c3fb27SDimitry Andric unsigned SplatBitWidth) { 18306c3fb27SDimitry Andric std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth); 18406c3fb27SDimitry Andric if (!Splat) 18506c3fb27SDimitry Andric return nullptr; 18606c3fb27SDimitry Andric 18706c3fb27SDimitry Andric // Determine scalar size to use for the constant splat vector, clamping as we 18806c3fb27SDimitry Andric // might have found a splat smaller than the original constant data. 18906c3fb27SDimitry Andric const Type *OriginalType = C->getType(); 19006c3fb27SDimitry Andric Type *SclTy = OriginalType->getScalarType(); 19106c3fb27SDimitry Andric unsigned NumSclBits = SclTy->getPrimitiveSizeInBits(); 19206c3fb27SDimitry Andric NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth); 193*5f757f3fSDimitry Andric LLVMContext &Ctx = OriginalType->getContext(); 19406c3fb27SDimitry Andric 19506c3fb27SDimitry Andric if (NumSclBits == 8) { 19606c3fb27SDimitry Andric SmallVector<uint8_t> RawBits; 19706c3fb27SDimitry Andric for (unsigned I = 0; I != SplatBitWidth; I += 8) 19806c3fb27SDimitry Andric RawBits.push_back(Splat->extractBits(8, I).getZExtValue()); 199*5f757f3fSDimitry Andric return ConstantDataVector::get(Ctx, RawBits); 20006c3fb27SDimitry Andric } 20106c3fb27SDimitry Andric 20206c3fb27SDimitry Andric if (NumSclBits == 16) { 20306c3fb27SDimitry Andric SmallVector<uint16_t> RawBits; 20406c3fb27SDimitry Andric for (unsigned I = 0; I != SplatBitWidth; I += 16) 20506c3fb27SDimitry Andric RawBits.push_back(Splat->extractBits(16, I).getZExtValue()); 20606c3fb27SDimitry Andric if (SclTy->is16bitFPTy()) 20706c3fb27SDimitry Andric return ConstantDataVector::getFP(SclTy, RawBits); 208*5f757f3fSDimitry Andric return ConstantDataVector::get(Ctx, RawBits); 20906c3fb27SDimitry Andric } 21006c3fb27SDimitry Andric 21106c3fb27SDimitry Andric if (NumSclBits == 32) { 21206c3fb27SDimitry Andric SmallVector<uint32_t> RawBits; 21306c3fb27SDimitry Andric for (unsigned I = 0; I != SplatBitWidth; I += 32) 21406c3fb27SDimitry Andric RawBits.push_back(Splat->extractBits(32, I).getZExtValue()); 21506c3fb27SDimitry Andric if (SclTy->isFloatTy()) 21606c3fb27SDimitry Andric return ConstantDataVector::getFP(SclTy, RawBits); 217*5f757f3fSDimitry Andric return ConstantDataVector::get(Ctx, RawBits); 21806c3fb27SDimitry Andric } 21906c3fb27SDimitry Andric 22006c3fb27SDimitry Andric // Fallback to i64 / double. 22106c3fb27SDimitry Andric SmallVector<uint64_t> RawBits; 22206c3fb27SDimitry Andric for (unsigned I = 0; I != SplatBitWidth; I += 64) 22306c3fb27SDimitry Andric RawBits.push_back(Splat->extractBits(64, I).getZExtValue()); 22406c3fb27SDimitry Andric if (SclTy->isDoubleTy()) 22506c3fb27SDimitry Andric return ConstantDataVector::getFP(SclTy, RawBits); 226*5f757f3fSDimitry Andric return ConstantDataVector::get(Ctx, RawBits); 22706c3fb27SDimitry Andric } 22806c3fb27SDimitry Andric 22906c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF, 23006c3fb27SDimitry Andric MachineBasicBlock &MBB, 23106c3fb27SDimitry Andric MachineInstr &MI) { 23206c3fb27SDimitry Andric unsigned Opc = MI.getOpcode(); 23306c3fb27SDimitry Andric MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool(); 234*5f757f3fSDimitry Andric bool HasAVX2 = ST->hasAVX2(); 23506c3fb27SDimitry Andric bool HasDQI = ST->hasDQI(); 23606c3fb27SDimitry Andric bool HasBWI = ST->hasBWI(); 237*5f757f3fSDimitry Andric bool HasVLX = ST->hasVLX(); 23806c3fb27SDimitry Andric 23906c3fb27SDimitry Andric auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128, 24006c3fb27SDimitry Andric unsigned OpBcst64, unsigned OpBcst32, 24106c3fb27SDimitry Andric unsigned OpBcst16, unsigned OpBcst8, 24206c3fb27SDimitry Andric unsigned OperandNo) { 24306c3fb27SDimitry Andric assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) && 24406c3fb27SDimitry Andric "Unexpected number of operands!"); 24506c3fb27SDimitry Andric 24606c3fb27SDimitry Andric MachineOperand &CstOp = MI.getOperand(OperandNo + X86::AddrDisp); 24706c3fb27SDimitry Andric if (auto *C = getConstantFromPool(MI, CstOp)) { 24806c3fb27SDimitry Andric // Attempt to detect a suitable splat from increasing splat widths. 24906c3fb27SDimitry Andric std::pair<unsigned, unsigned> Broadcasts[] = { 25006c3fb27SDimitry Andric {8, OpBcst8}, {16, OpBcst16}, {32, OpBcst32}, 25106c3fb27SDimitry Andric {64, OpBcst64}, {128, OpBcst128}, {256, OpBcst256}, 25206c3fb27SDimitry Andric }; 25306c3fb27SDimitry Andric for (auto [BitWidth, OpBcst] : Broadcasts) { 25406c3fb27SDimitry Andric if (OpBcst) { 25506c3fb27SDimitry Andric // Construct a suitable splat constant and adjust the MI to 25606c3fb27SDimitry Andric // use the new constant pool entry. 25706c3fb27SDimitry Andric if (Constant *NewCst = rebuildSplatableConstant(C, BitWidth)) { 25806c3fb27SDimitry Andric unsigned NewCPI = 25906c3fb27SDimitry Andric CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8)); 26006c3fb27SDimitry Andric MI.setDesc(TII->get(OpBcst)); 26106c3fb27SDimitry Andric CstOp.setIndex(NewCPI); 26206c3fb27SDimitry Andric return true; 26306c3fb27SDimitry Andric } 26406c3fb27SDimitry Andric } 26506c3fb27SDimitry Andric } 26606c3fb27SDimitry Andric } 26706c3fb27SDimitry Andric return false; 26806c3fb27SDimitry Andric }; 26906c3fb27SDimitry Andric 27006c3fb27SDimitry Andric // Attempt to convert full width vector loads into broadcast loads. 27106c3fb27SDimitry Andric switch (Opc) { 27206c3fb27SDimitry Andric /* FP Loads */ 27306c3fb27SDimitry Andric case X86::MOVAPDrm: 27406c3fb27SDimitry Andric case X86::MOVAPSrm: 27506c3fb27SDimitry Andric case X86::MOVUPDrm: 27606c3fb27SDimitry Andric case X86::MOVUPSrm: 27706c3fb27SDimitry Andric // TODO: SSE3 MOVDDUP Handling 27806c3fb27SDimitry Andric return false; 27906c3fb27SDimitry Andric case X86::VMOVAPDrm: 28006c3fb27SDimitry Andric case X86::VMOVAPSrm: 28106c3fb27SDimitry Andric case X86::VMOVUPDrm: 28206c3fb27SDimitry Andric case X86::VMOVUPSrm: 28306c3fb27SDimitry Andric return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0, 28406c3fb27SDimitry Andric 1); 28506c3fb27SDimitry Andric case X86::VMOVAPDYrm: 28606c3fb27SDimitry Andric case X86::VMOVAPSYrm: 28706c3fb27SDimitry Andric case X86::VMOVUPDYrm: 28806c3fb27SDimitry Andric case X86::VMOVUPSYrm: 289*5f757f3fSDimitry Andric return ConvertToBroadcast(0, X86::VBROADCASTF128rm, X86::VBROADCASTSDYrm, 29006c3fb27SDimitry Andric X86::VBROADCASTSSYrm, 0, 0, 1); 29106c3fb27SDimitry Andric case X86::VMOVAPDZ128rm: 29206c3fb27SDimitry Andric case X86::VMOVAPSZ128rm: 29306c3fb27SDimitry Andric case X86::VMOVUPDZ128rm: 29406c3fb27SDimitry Andric case X86::VMOVUPSZ128rm: 29506c3fb27SDimitry Andric return ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm, 29606c3fb27SDimitry Andric X86::VBROADCASTSSZ128rm, 0, 0, 1); 29706c3fb27SDimitry Andric case X86::VMOVAPDZ256rm: 29806c3fb27SDimitry Andric case X86::VMOVAPSZ256rm: 29906c3fb27SDimitry Andric case X86::VMOVUPDZ256rm: 30006c3fb27SDimitry Andric case X86::VMOVUPSZ256rm: 301*5f757f3fSDimitry Andric return ConvertToBroadcast(0, X86::VBROADCASTF32X4Z256rm, 302*5f757f3fSDimitry Andric X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm, 303*5f757f3fSDimitry Andric 0, 0, 1); 30406c3fb27SDimitry Andric case X86::VMOVAPDZrm: 30506c3fb27SDimitry Andric case X86::VMOVAPSZrm: 30606c3fb27SDimitry Andric case X86::VMOVUPDZrm: 30706c3fb27SDimitry Andric case X86::VMOVUPSZrm: 308*5f757f3fSDimitry Andric return ConvertToBroadcast(X86::VBROADCASTF64X4rm, X86::VBROADCASTF32X4rm, 309*5f757f3fSDimitry Andric X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0, 310*5f757f3fSDimitry Andric 1); 31106c3fb27SDimitry Andric /* Integer Loads */ 31206c3fb27SDimitry Andric case X86::VMOVDQArm: 31306c3fb27SDimitry Andric case X86::VMOVDQUrm: 314*5f757f3fSDimitry Andric return ConvertToBroadcast( 315*5f757f3fSDimitry Andric 0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, 316*5f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, 317*5f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTWrm : 0, HasAVX2 ? X86::VPBROADCASTBrm : 0, 31806c3fb27SDimitry Andric 1); 31906c3fb27SDimitry Andric case X86::VMOVDQAYrm: 32006c3fb27SDimitry Andric case X86::VMOVDQUYrm: 321*5f757f3fSDimitry Andric return ConvertToBroadcast( 322*5f757f3fSDimitry Andric 0, HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, 323*5f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, 324*5f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, 325*5f757f3fSDimitry Andric HasAVX2 ? X86::VPBROADCASTWYrm : 0, HasAVX2 ? X86::VPBROADCASTBYrm : 0, 326*5f757f3fSDimitry Andric 1); 32706c3fb27SDimitry Andric case X86::VMOVDQA32Z128rm: 32806c3fb27SDimitry Andric case X86::VMOVDQA64Z128rm: 32906c3fb27SDimitry Andric case X86::VMOVDQU32Z128rm: 33006c3fb27SDimitry Andric case X86::VMOVDQU64Z128rm: 33106c3fb27SDimitry Andric return ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm, 33206c3fb27SDimitry Andric X86::VPBROADCASTDZ128rm, 33306c3fb27SDimitry Andric HasBWI ? X86::VPBROADCASTWZ128rm : 0, 33406c3fb27SDimitry Andric HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1); 33506c3fb27SDimitry Andric case X86::VMOVDQA32Z256rm: 33606c3fb27SDimitry Andric case X86::VMOVDQA64Z256rm: 33706c3fb27SDimitry Andric case X86::VMOVDQU32Z256rm: 33806c3fb27SDimitry Andric case X86::VMOVDQU64Z256rm: 339*5f757f3fSDimitry Andric return ConvertToBroadcast(0, X86::VBROADCASTI32X4Z256rm, 34006c3fb27SDimitry Andric X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm, 34106c3fb27SDimitry Andric HasBWI ? X86::VPBROADCASTWZ256rm : 0, 34206c3fb27SDimitry Andric HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1); 34306c3fb27SDimitry Andric case X86::VMOVDQA32Zrm: 34406c3fb27SDimitry Andric case X86::VMOVDQA64Zrm: 34506c3fb27SDimitry Andric case X86::VMOVDQU32Zrm: 34606c3fb27SDimitry Andric case X86::VMOVDQU64Zrm: 347*5f757f3fSDimitry Andric return ConvertToBroadcast(X86::VBROADCASTI64X4rm, X86::VBROADCASTI32X4rm, 34806c3fb27SDimitry Andric X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm, 349*5f757f3fSDimitry Andric HasBWI ? X86::VPBROADCASTWZrm : 0, 350*5f757f3fSDimitry Andric HasBWI ? X86::VPBROADCASTBZrm : 0, 1); 35106c3fb27SDimitry Andric } 35206c3fb27SDimitry Andric 353*5f757f3fSDimitry Andric auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) { 35406c3fb27SDimitry Andric unsigned OpBcst32 = 0, OpBcst64 = 0; 35506c3fb27SDimitry Andric unsigned OpNoBcst32 = 0, OpNoBcst64 = 0; 356*5f757f3fSDimitry Andric if (OpSrc32) { 357*5f757f3fSDimitry Andric if (const X86FoldTableEntry *Mem2Bcst = 358*5f757f3fSDimitry Andric llvm::lookupBroadcastFoldTable(OpSrc32, 32)) { 35906c3fb27SDimitry Andric OpBcst32 = Mem2Bcst->DstOp; 36006c3fb27SDimitry Andric OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK; 36106c3fb27SDimitry Andric } 362*5f757f3fSDimitry Andric } 363*5f757f3fSDimitry Andric if (OpSrc64) { 364*5f757f3fSDimitry Andric if (const X86FoldTableEntry *Mem2Bcst = 365*5f757f3fSDimitry Andric llvm::lookupBroadcastFoldTable(OpSrc64, 64)) { 36606c3fb27SDimitry Andric OpBcst64 = Mem2Bcst->DstOp; 36706c3fb27SDimitry Andric OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK; 36806c3fb27SDimitry Andric } 369*5f757f3fSDimitry Andric } 37006c3fb27SDimitry Andric assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) && 37106c3fb27SDimitry Andric "OperandNo mismatch"); 37206c3fb27SDimitry Andric 37306c3fb27SDimitry Andric if (OpBcst32 || OpBcst64) { 37406c3fb27SDimitry Andric unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32; 37506c3fb27SDimitry Andric return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo); 37606c3fb27SDimitry Andric } 377*5f757f3fSDimitry Andric return false; 378*5f757f3fSDimitry Andric }; 379*5f757f3fSDimitry Andric 380*5f757f3fSDimitry Andric // Attempt to find a AVX512 mapping from a full width memory-fold instruction 381*5f757f3fSDimitry Andric // to a broadcast-fold instruction variant. 382*5f757f3fSDimitry Andric if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX) 383*5f757f3fSDimitry Andric return ConvertToBroadcastAVX512(Opc, Opc); 384*5f757f3fSDimitry Andric 385*5f757f3fSDimitry Andric // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic 386*5f757f3fSDimitry Andric // conversion to see if we can convert to a broadcasted (integer) logic op. 387*5f757f3fSDimitry Andric if (HasVLX && !HasDQI) { 388*5f757f3fSDimitry Andric unsigned OpSrc32 = 0, OpSrc64 = 0; 389*5f757f3fSDimitry Andric switch (Opc) { 390*5f757f3fSDimitry Andric case X86::VANDPDrm: 391*5f757f3fSDimitry Andric case X86::VANDPSrm: 392*5f757f3fSDimitry Andric case X86::VPANDrm: 393*5f757f3fSDimitry Andric OpSrc32 = X86 ::VPANDDZ128rm; 394*5f757f3fSDimitry Andric OpSrc64 = X86 ::VPANDQZ128rm; 395*5f757f3fSDimitry Andric break; 396*5f757f3fSDimitry Andric case X86::VANDPDYrm: 397*5f757f3fSDimitry Andric case X86::VANDPSYrm: 398*5f757f3fSDimitry Andric case X86::VPANDYrm: 399*5f757f3fSDimitry Andric OpSrc32 = X86 ::VPANDDZ256rm; 400*5f757f3fSDimitry Andric OpSrc64 = X86 ::VPANDQZ256rm; 401*5f757f3fSDimitry Andric break; 402*5f757f3fSDimitry Andric case X86::VANDNPDrm: 403*5f757f3fSDimitry Andric case X86::VANDNPSrm: 404*5f757f3fSDimitry Andric case X86::VPANDNrm: 405*5f757f3fSDimitry Andric OpSrc32 = X86 ::VPANDNDZ128rm; 406*5f757f3fSDimitry Andric OpSrc64 = X86 ::VPANDNQZ128rm; 407*5f757f3fSDimitry Andric break; 408*5f757f3fSDimitry Andric case X86::VANDNPDYrm: 409*5f757f3fSDimitry Andric case X86::VANDNPSYrm: 410*5f757f3fSDimitry Andric case X86::VPANDNYrm: 411*5f757f3fSDimitry Andric OpSrc32 = X86 ::VPANDNDZ256rm; 412*5f757f3fSDimitry Andric OpSrc64 = X86 ::VPANDNQZ256rm; 413*5f757f3fSDimitry Andric break; 414*5f757f3fSDimitry Andric case X86::VORPDrm: 415*5f757f3fSDimitry Andric case X86::VORPSrm: 416*5f757f3fSDimitry Andric case X86::VPORrm: 417*5f757f3fSDimitry Andric OpSrc32 = X86 ::VPORDZ128rm; 418*5f757f3fSDimitry Andric OpSrc64 = X86 ::VPORQZ128rm; 419*5f757f3fSDimitry Andric break; 420*5f757f3fSDimitry Andric case X86::VORPDYrm: 421*5f757f3fSDimitry Andric case X86::VORPSYrm: 422*5f757f3fSDimitry Andric case X86::VPORYrm: 423*5f757f3fSDimitry Andric OpSrc32 = X86 ::VPORDZ256rm; 424*5f757f3fSDimitry Andric OpSrc64 = X86 ::VPORQZ256rm; 425*5f757f3fSDimitry Andric break; 426*5f757f3fSDimitry Andric case X86::VXORPDrm: 427*5f757f3fSDimitry Andric case X86::VXORPSrm: 428*5f757f3fSDimitry Andric case X86::VPXORrm: 429*5f757f3fSDimitry Andric OpSrc32 = X86 ::VPXORDZ128rm; 430*5f757f3fSDimitry Andric OpSrc64 = X86 ::VPXORQZ128rm; 431*5f757f3fSDimitry Andric break; 432*5f757f3fSDimitry Andric case X86::VXORPDYrm: 433*5f757f3fSDimitry Andric case X86::VXORPSYrm: 434*5f757f3fSDimitry Andric case X86::VPXORYrm: 435*5f757f3fSDimitry Andric OpSrc32 = X86 ::VPXORDZ256rm; 436*5f757f3fSDimitry Andric OpSrc64 = X86 ::VPXORQZ256rm; 437*5f757f3fSDimitry Andric break; 438*5f757f3fSDimitry Andric } 439*5f757f3fSDimitry Andric if (OpSrc32 || OpSrc64) 440*5f757f3fSDimitry Andric return ConvertToBroadcastAVX512(OpSrc32, OpSrc64); 44106c3fb27SDimitry Andric } 44206c3fb27SDimitry Andric 44306c3fb27SDimitry Andric return false; 44406c3fb27SDimitry Andric } 44506c3fb27SDimitry Andric 44606c3fb27SDimitry Andric bool X86FixupVectorConstantsPass::runOnMachineFunction(MachineFunction &MF) { 44706c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n";); 44806c3fb27SDimitry Andric bool Changed = false; 44906c3fb27SDimitry Andric ST = &MF.getSubtarget<X86Subtarget>(); 45006c3fb27SDimitry Andric TII = ST->getInstrInfo(); 45106c3fb27SDimitry Andric SM = &ST->getSchedModel(); 45206c3fb27SDimitry Andric 45306c3fb27SDimitry Andric for (MachineBasicBlock &MBB : MF) { 45406c3fb27SDimitry Andric for (MachineInstr &MI : MBB) { 45506c3fb27SDimitry Andric if (processInstruction(MF, MBB, MI)) { 45606c3fb27SDimitry Andric ++NumInstChanges; 45706c3fb27SDimitry Andric Changed = true; 45806c3fb27SDimitry Andric } 45906c3fb27SDimitry Andric } 46006c3fb27SDimitry Andric } 46106c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n";); 46206c3fb27SDimitry Andric return Changed; 46306c3fb27SDimitry Andric } 464