xref: /llvm-project/llvm/lib/Target/X86/X86FixupVectorConstants.cpp (revision 6ac4fe8de014336ce66d02ddd07e85db3b8e77a2)
1 //===-- X86FixupVectorConstants.cpp - optimize constant generation  -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file examines all full size vector constant pool loads and attempts to
10 // replace them with smaller constant pool entries, including:
11 // * Converting AVX512 memory-fold instructions to their broadcast-fold form
12 // * Broadcasting of full width loads.
13 // * TODO: Sign/Zero extension of full width loads.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "X86.h"
18 #include "X86InstrFoldTables.h"
19 #include "X86InstrInfo.h"
20 #include "X86Subtarget.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/MachineConstantPool.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "x86-fixup-vector-constants"
27 
28 STATISTIC(NumInstChanges, "Number of instructions changes");
29 
30 namespace {
31 class X86FixupVectorConstantsPass : public MachineFunctionPass {
32 public:
33   static char ID;
34 
35   X86FixupVectorConstantsPass() : MachineFunctionPass(ID) {}
36 
37   StringRef getPassName() const override {
38     return "X86 Fixup Vector Constants";
39   }
40 
41   bool runOnMachineFunction(MachineFunction &MF) override;
42   bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
43                           MachineInstr &MI);
44 
45   // This pass runs after regalloc and doesn't support VReg operands.
46   MachineFunctionProperties getRequiredProperties() const override {
47     return MachineFunctionProperties().set(
48         MachineFunctionProperties::Property::NoVRegs);
49   }
50 
51 private:
52   const X86InstrInfo *TII = nullptr;
53   const X86Subtarget *ST = nullptr;
54   const MCSchedModel *SM = nullptr;
55 };
56 } // end anonymous namespace
57 
58 char X86FixupVectorConstantsPass::ID = 0;
59 
60 INITIALIZE_PASS(X86FixupVectorConstantsPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
61 
62 FunctionPass *llvm::createX86FixupVectorConstants() {
63   return new X86FixupVectorConstantsPass();
64 }
65 
66 // Attempt to extract the full width of bits data from the constant.
67 static std::optional<APInt> extractConstantBits(const Constant *C) {
68   unsigned NumBits = C->getType()->getPrimitiveSizeInBits();
69 
70   if (isa<UndefValue>(C))
71     return APInt::getZero(NumBits);
72 
73   if (auto *CInt = dyn_cast<ConstantInt>(C))
74     return CInt->getValue();
75 
76   if (auto *CFP = dyn_cast<ConstantFP>(C))
77     return CFP->getValue().bitcastToAPInt();
78 
79   if (auto *CV = dyn_cast<ConstantVector>(C)) {
80     if (auto *CVSplat = CV->getSplatValue(/*AllowUndefs*/ true)) {
81       if (std::optional<APInt> Bits = extractConstantBits(CVSplat)) {
82         assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat");
83         return APInt::getSplat(NumBits, *Bits);
84       }
85     }
86 
87     APInt Bits = APInt::getZero(NumBits);
88     for (unsigned I = 0, E = CV->getNumOperands(); I != E; ++I) {
89       Constant *Elt = CV->getOperand(I);
90       std::optional<APInt> SubBits = extractConstantBits(Elt);
91       if (!SubBits)
92         return std::nullopt;
93       assert(NumBits == (E * SubBits->getBitWidth()) &&
94              "Illegal vector element size");
95       Bits.insertBits(*SubBits, I * SubBits->getBitWidth());
96     }
97     return Bits;
98   }
99 
100   if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
101     bool IsInteger = CDS->getElementType()->isIntegerTy();
102     bool IsFloat = CDS->getElementType()->isHalfTy() ||
103                    CDS->getElementType()->isBFloatTy() ||
104                    CDS->getElementType()->isFloatTy() ||
105                    CDS->getElementType()->isDoubleTy();
106     if (IsInteger || IsFloat) {
107       APInt Bits = APInt::getZero(NumBits);
108       unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
109       for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
110         if (IsInteger)
111           Bits.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
112         else
113           Bits.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),
114                           I * EltBits);
115       }
116       return Bits;
117     }
118   }
119 
120   return std::nullopt;
121 }
122 
123 // Attempt to compute the splat width of bits data by normalizing the splat to
124 // remove undefs.
125 static std::optional<APInt> getSplatableConstant(const Constant *C,
126                                                  unsigned SplatBitWidth) {
127   const Type *Ty = C->getType();
128   assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 &&
129          "Illegal splat width");
130 
131   if (std::optional<APInt> Bits = extractConstantBits(C))
132     if (Bits->isSplat(SplatBitWidth))
133       return Bits->trunc(SplatBitWidth);
134 
135   // Detect general splats with undefs.
136   // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting?
137   if (auto *CV = dyn_cast<ConstantVector>(C)) {
138     unsigned NumOps = CV->getNumOperands();
139     unsigned NumEltsBits = Ty->getScalarSizeInBits();
140     unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
141     if ((SplatBitWidth % NumEltsBits) == 0) {
142       // Collect the elements and ensure that within the repeated splat sequence
143       // they either match or are undef.
144       SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr);
145       for (unsigned Idx = 0; Idx != NumOps; ++Idx) {
146         if (Constant *Elt = CV->getAggregateElement(Idx)) {
147           if (isa<UndefValue>(Elt))
148             continue;
149           unsigned SplatIdx = Idx % NumScaleOps;
150           if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
151             Sequence[SplatIdx] = Elt;
152             continue;
153           }
154         }
155         return std::nullopt;
156       }
157       // Extract the constant bits forming the splat and insert into the bits
158       // data, leave undef as zero.
159       APInt SplatBits = APInt::getZero(SplatBitWidth);
160       for (unsigned I = 0; I != NumScaleOps; ++I) {
161         if (!Sequence[I])
162           continue;
163         if (std::optional<APInt> Bits = extractConstantBits(Sequence[I])) {
164           SplatBits.insertBits(*Bits, I * Bits->getBitWidth());
165           continue;
166         }
167         return std::nullopt;
168       }
169       return SplatBits;
170     }
171   }
172 
173   return std::nullopt;
174 }
175 
176 // Split raw bits into a constant vector of elements of a specific bit width.
177 // NOTE: We don't always bother converting to scalars if the vector length is 1.
178 static Constant *rebuildConstant(LLVMContext &Ctx, Type *SclTy,
179                                  const APInt &Bits, unsigned NumSclBits) {
180   unsigned BitWidth = Bits.getBitWidth();
181 
182   if (NumSclBits == 8) {
183     SmallVector<uint8_t> RawBits;
184     for (unsigned I = 0; I != BitWidth; I += 8)
185       RawBits.push_back(Bits.extractBits(8, I).getZExtValue());
186     return ConstantDataVector::get(Ctx, RawBits);
187   }
188 
189   if (NumSclBits == 16) {
190     SmallVector<uint16_t> RawBits;
191     for (unsigned I = 0; I != BitWidth; I += 16)
192       RawBits.push_back(Bits.extractBits(16, I).getZExtValue());
193     if (SclTy->is16bitFPTy())
194       return ConstantDataVector::getFP(SclTy, RawBits);
195     return ConstantDataVector::get(Ctx, RawBits);
196   }
197 
198   if (NumSclBits == 32) {
199     SmallVector<uint32_t> RawBits;
200     for (unsigned I = 0; I != BitWidth; I += 32)
201       RawBits.push_back(Bits.extractBits(32, I).getZExtValue());
202     if (SclTy->isFloatTy())
203       return ConstantDataVector::getFP(SclTy, RawBits);
204     return ConstantDataVector::get(Ctx, RawBits);
205   }
206 
207   assert(NumSclBits == 64 && "Unhandled vector element width");
208 
209   SmallVector<uint64_t> RawBits;
210   for (unsigned I = 0; I != BitWidth; I += 64)
211     RawBits.push_back(Bits.extractBits(64, I).getZExtValue());
212   if (SclTy->isDoubleTy())
213     return ConstantDataVector::getFP(SclTy, RawBits);
214   return ConstantDataVector::get(Ctx, RawBits);
215 }
216 
217 // Attempt to rebuild a normalized splat vector constant of the requested splat
218 // width, built up of potentially smaller scalar values.
219 static Constant *rebuildSplatCst(const Constant *C, unsigned /*NumElts*/,
220                                  unsigned SplatBitWidth) {
221   std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth);
222   if (!Splat)
223     return nullptr;
224 
225   // Determine scalar size to use for the constant splat vector, clamping as we
226   // might have found a splat smaller than the original constant data.
227   const Type *OriginalType = C->getType();
228   Type *SclTy = OriginalType->getScalarType();
229   unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
230   NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
231 
232   // Fallback to i64 / double.
233   NumSclBits = (NumSclBits == 8 || NumSclBits == 16 || NumSclBits == 32)
234                    ? NumSclBits
235                    : 64;
236 
237   // Extract per-element bits.
238   return rebuildConstant(OriginalType->getContext(), SclTy, *Splat, NumSclBits);
239 }
240 
241 static Constant *rebuildZeroUpperCst(const Constant *C, unsigned /*NumElts*/,
242                                      unsigned ScalarBitWidth) {
243   Type *Ty = C->getType();
244   Type *SclTy = Ty->getScalarType();
245   unsigned NumBits = Ty->getPrimitiveSizeInBits();
246   unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
247   LLVMContext &Ctx = C->getContext();
248 
249   if (NumBits > ScalarBitWidth) {
250     // Determine if the upper bits are all zero.
251     if (std::optional<APInt> Bits = extractConstantBits(C)) {
252       if (Bits->countLeadingZeros() >= (NumBits - ScalarBitWidth)) {
253         // If the original constant was made of smaller elements, try to retain
254         // those types.
255         if (ScalarBitWidth > NumSclBits && (ScalarBitWidth % NumSclBits) == 0)
256           return rebuildConstant(Ctx, SclTy, *Bits, NumSclBits);
257 
258         // Fallback to raw integer bits.
259         APInt RawBits = Bits->zextOrTrunc(ScalarBitWidth);
260         return ConstantInt::get(Ctx, RawBits);
261       }
262     }
263   }
264 
265   return nullptr;
266 }
267 
268 bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
269                                                      MachineBasicBlock &MBB,
270                                                      MachineInstr &MI) {
271   unsigned Opc = MI.getOpcode();
272   MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
273   bool HasAVX2 = ST->hasAVX2();
274   bool HasDQI = ST->hasDQI();
275   bool HasBWI = ST->hasBWI();
276   bool HasVLX = ST->hasVLX();
277 
278   struct FixupEntry {
279     int Op;
280     int NumCstElts;
281     int BitWidth;
282     std::function<Constant *(const Constant *, unsigned, unsigned)>
283         RebuildConstant;
284   };
285   auto FixupConstant = [&](ArrayRef<FixupEntry> Fixups, unsigned OperandNo) {
286 #ifdef EXPENSIVE_CHECKS
287     assert(llvm::is_sorted(Fixups,
288                            [](const FixupEntry &A, const FixupEntry &B) {
289                              return (A.NumCstElts * A.BitWidth) <
290                                     (B.NumCstElts * B.BitWidth);
291                            }) &&
292            "Constant fixup table not sorted in ascending constant size");
293 #endif
294     assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
295            "Unexpected number of operands!");
296     if (auto *C = X86::getConstantFromPool(MI, OperandNo)) {
297       for (const FixupEntry &Fixup : Fixups) {
298         if (Fixup.Op) {
299           // Construct a suitable constant and adjust the MI to use the new
300           // constant pool entry.
301           if (Constant *NewCst =
302                   Fixup.RebuildConstant(C, Fixup.NumCstElts, Fixup.BitWidth)) {
303             unsigned NewCPI =
304                 CP->getConstantPoolIndex(NewCst, Align(Fixup.BitWidth / 8));
305             MI.setDesc(TII->get(Fixup.Op));
306             MI.getOperand(OperandNo + X86::AddrDisp).setIndex(NewCPI);
307             return true;
308           }
309         }
310       }
311     }
312     return false;
313   };
314 
315   // Attempt to convert full width vector loads into broadcast/vzload loads.
316   switch (Opc) {
317   /* FP Loads */
318   case X86::MOVAPDrm:
319   case X86::MOVAPSrm:
320   case X86::MOVUPDrm:
321   case X86::MOVUPSrm:
322     // TODO: SSE3 MOVDDUP Handling
323     return FixupConstant({{X86::MOVSSrm, 1, 32, rebuildZeroUpperCst},
324                           {X86::MOVSDrm, 1, 64, rebuildZeroUpperCst}},
325                          1);
326   case X86::VMOVAPDrm:
327   case X86::VMOVAPSrm:
328   case X86::VMOVUPDrm:
329   case X86::VMOVUPSrm:
330     return FixupConstant({{X86::VMOVSSrm, 1, 32, rebuildZeroUpperCst},
331                           {X86::VBROADCASTSSrm, 1, 32, rebuildSplatCst},
332                           {X86::VMOVSDrm, 1, 64, rebuildZeroUpperCst},
333                           {X86::VMOVDDUPrm, 1, 64, rebuildSplatCst}},
334                          1);
335   case X86::VMOVAPDYrm:
336   case X86::VMOVAPSYrm:
337   case X86::VMOVUPDYrm:
338   case X86::VMOVUPSYrm:
339     return FixupConstant({{X86::VBROADCASTSSYrm, 1, 32, rebuildSplatCst},
340                           {X86::VBROADCASTSDYrm, 1, 64, rebuildSplatCst},
341                           {X86::VBROADCASTF128rm, 1, 128, rebuildSplatCst}},
342                          1);
343   case X86::VMOVAPDZ128rm:
344   case X86::VMOVAPSZ128rm:
345   case X86::VMOVUPDZ128rm:
346   case X86::VMOVUPSZ128rm:
347     return FixupConstant({{X86::VMOVSSZrm, 1, 32, rebuildZeroUpperCst},
348                           {X86::VBROADCASTSSZ128rm, 1, 32, rebuildSplatCst},
349                           {X86::VMOVSDZrm, 1, 64, rebuildZeroUpperCst},
350                           {X86::VMOVDDUPZ128rm, 1, 64, rebuildSplatCst}},
351                          1);
352   case X86::VMOVAPDZ256rm:
353   case X86::VMOVAPSZ256rm:
354   case X86::VMOVUPDZ256rm:
355   case X86::VMOVUPSZ256rm:
356     return FixupConstant(
357         {{X86::VBROADCASTSSZ256rm, 1, 32, rebuildSplatCst},
358          {X86::VBROADCASTSDZ256rm, 1, 64, rebuildSplatCst},
359          {X86::VBROADCASTF32X4Z256rm, 1, 128, rebuildSplatCst}},
360         1);
361   case X86::VMOVAPDZrm:
362   case X86::VMOVAPSZrm:
363   case X86::VMOVUPDZrm:
364   case X86::VMOVUPSZrm:
365     return FixupConstant({{X86::VBROADCASTSSZrm, 1, 32, rebuildSplatCst},
366                           {X86::VBROADCASTSDZrm, 1, 64, rebuildSplatCst},
367                           {X86::VBROADCASTF32X4rm, 1, 128, rebuildSplatCst},
368                           {X86::VBROADCASTF64X4rm, 1, 256, rebuildSplatCst}},
369                          1);
370     /* Integer Loads */
371   case X86::MOVDQArm:
372   case X86::MOVDQUrm: {
373     return FixupConstant({{X86::MOVDI2PDIrm, 1, 32, rebuildZeroUpperCst},
374                           {X86::MOVQI2PQIrm, 1, 64, rebuildZeroUpperCst}},
375                          1);
376   }
377   case X86::VMOVDQArm:
378   case X86::VMOVDQUrm: {
379     FixupEntry Fixups[] = {
380         {HasAVX2 ? X86::VPBROADCASTBrm : 0, 1, 8, rebuildSplatCst},
381         {HasAVX2 ? X86::VPBROADCASTWrm : 0, 1, 16, rebuildSplatCst},
382         {X86::VMOVDI2PDIrm, 1, 32, rebuildZeroUpperCst},
383         {HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, 1, 32,
384          rebuildSplatCst},
385         {X86::VMOVQI2PQIrm, 1, 64, rebuildZeroUpperCst},
386         {HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, 1, 64,
387          rebuildSplatCst},
388     };
389     return FixupConstant(Fixups, 1);
390   }
391   case X86::VMOVDQAYrm:
392   case X86::VMOVDQUYrm: {
393     FixupEntry Fixups[] = {
394         {HasAVX2 ? X86::VPBROADCASTBYrm : 0, 1, 8, rebuildSplatCst},
395         {HasAVX2 ? X86::VPBROADCASTWYrm : 0, 1, 16, rebuildSplatCst},
396         {HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, 1, 32,
397          rebuildSplatCst},
398         {HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, 1, 64,
399          rebuildSplatCst},
400         {HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, 1, 128,
401          rebuildSplatCst}};
402     return FixupConstant(Fixups, 1);
403   }
404   case X86::VMOVDQA32Z128rm:
405   case X86::VMOVDQA64Z128rm:
406   case X86::VMOVDQU32Z128rm:
407   case X86::VMOVDQU64Z128rm: {
408     FixupEntry Fixups[] = {
409         {HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1, 8, rebuildSplatCst},
410         {HasBWI ? X86::VPBROADCASTWZ128rm : 0, 1, 16, rebuildSplatCst},
411         {X86::VMOVDI2PDIZrm, 1, 32, rebuildZeroUpperCst},
412         {X86::VPBROADCASTDZ128rm, 1, 32, rebuildSplatCst},
413         {X86::VMOVQI2PQIZrm, 1, 64, rebuildZeroUpperCst},
414         {X86::VPBROADCASTQZ128rm, 1, 64, rebuildSplatCst}};
415     return FixupConstant(Fixups, 1);
416   }
417   case X86::VMOVDQA32Z256rm:
418   case X86::VMOVDQA64Z256rm:
419   case X86::VMOVDQU32Z256rm:
420   case X86::VMOVDQU64Z256rm: {
421     FixupEntry Fixups[] = {
422         {HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1, 8, rebuildSplatCst},
423         {HasBWI ? X86::VPBROADCASTWZ256rm : 0, 1, 16, rebuildSplatCst},
424         {X86::VPBROADCASTDZ256rm, 1, 32, rebuildSplatCst},
425         {X86::VPBROADCASTQZ256rm, 1, 64, rebuildSplatCst},
426         {X86::VBROADCASTI32X4Z256rm, 1, 128, rebuildSplatCst}};
427     return FixupConstant(Fixups, 1);
428   }
429   case X86::VMOVDQA32Zrm:
430   case X86::VMOVDQA64Zrm:
431   case X86::VMOVDQU32Zrm:
432   case X86::VMOVDQU64Zrm: {
433     FixupEntry Fixups[] = {
434         {HasBWI ? X86::VPBROADCASTBZrm : 0, 1, 8, rebuildSplatCst},
435         {HasBWI ? X86::VPBROADCASTWZrm : 0, 1, 16, rebuildSplatCst},
436         {X86::VPBROADCASTDZrm, 1, 32, rebuildSplatCst},
437         {X86::VPBROADCASTQZrm, 1, 64, rebuildSplatCst},
438         {X86::VBROADCASTI32X4rm, 1, 128, rebuildSplatCst},
439         {X86::VBROADCASTI64X4rm, 1, 256, rebuildSplatCst}};
440     return FixupConstant(Fixups, 1);
441   }
442   }
443 
444   auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
445     unsigned OpBcst32 = 0, OpBcst64 = 0;
446     unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
447     if (OpSrc32) {
448       if (const X86FoldTableEntry *Mem2Bcst =
449               llvm::lookupBroadcastFoldTableBySize(OpSrc32, 32)) {
450         OpBcst32 = Mem2Bcst->DstOp;
451         OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
452       }
453     }
454     if (OpSrc64) {
455       if (const X86FoldTableEntry *Mem2Bcst =
456               llvm::lookupBroadcastFoldTableBySize(OpSrc64, 64)) {
457         OpBcst64 = Mem2Bcst->DstOp;
458         OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
459       }
460     }
461     assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
462            "OperandNo mismatch");
463 
464     if (OpBcst32 || OpBcst64) {
465       unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
466       FixupEntry Fixups[] = {{(int)OpBcst32, 32, 32, rebuildSplatCst},
467                              {(int)OpBcst64, 64, 64, rebuildSplatCst}};
468       return FixupConstant(Fixups, OpNo);
469     }
470     return false;
471   };
472 
473   // Attempt to find a AVX512 mapping from a full width memory-fold instruction
474   // to a broadcast-fold instruction variant.
475   if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX)
476     return ConvertToBroadcastAVX512(Opc, Opc);
477 
478   // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
479   // conversion to see if we can convert to a broadcasted (integer) logic op.
480   if (HasVLX && !HasDQI) {
481     unsigned OpSrc32 = 0, OpSrc64 = 0;
482     switch (Opc) {
483     case X86::VANDPDrm:
484     case X86::VANDPSrm:
485     case X86::VPANDrm:
486       OpSrc32 = X86 ::VPANDDZ128rm;
487       OpSrc64 = X86 ::VPANDQZ128rm;
488       break;
489     case X86::VANDPDYrm:
490     case X86::VANDPSYrm:
491     case X86::VPANDYrm:
492       OpSrc32 = X86 ::VPANDDZ256rm;
493       OpSrc64 = X86 ::VPANDQZ256rm;
494       break;
495     case X86::VANDNPDrm:
496     case X86::VANDNPSrm:
497     case X86::VPANDNrm:
498       OpSrc32 = X86 ::VPANDNDZ128rm;
499       OpSrc64 = X86 ::VPANDNQZ128rm;
500       break;
501     case X86::VANDNPDYrm:
502     case X86::VANDNPSYrm:
503     case X86::VPANDNYrm:
504       OpSrc32 = X86 ::VPANDNDZ256rm;
505       OpSrc64 = X86 ::VPANDNQZ256rm;
506       break;
507     case X86::VORPDrm:
508     case X86::VORPSrm:
509     case X86::VPORrm:
510       OpSrc32 = X86 ::VPORDZ128rm;
511       OpSrc64 = X86 ::VPORQZ128rm;
512       break;
513     case X86::VORPDYrm:
514     case X86::VORPSYrm:
515     case X86::VPORYrm:
516       OpSrc32 = X86 ::VPORDZ256rm;
517       OpSrc64 = X86 ::VPORQZ256rm;
518       break;
519     case X86::VXORPDrm:
520     case X86::VXORPSrm:
521     case X86::VPXORrm:
522       OpSrc32 = X86 ::VPXORDZ128rm;
523       OpSrc64 = X86 ::VPXORQZ128rm;
524       break;
525     case X86::VXORPDYrm:
526     case X86::VXORPSYrm:
527     case X86::VPXORYrm:
528       OpSrc32 = X86 ::VPXORDZ256rm;
529       OpSrc64 = X86 ::VPXORQZ256rm;
530       break;
531     }
532     if (OpSrc32 || OpSrc64)
533       return ConvertToBroadcastAVX512(OpSrc32, OpSrc64);
534   }
535 
536   return false;
537 }
538 
539 bool X86FixupVectorConstantsPass::runOnMachineFunction(MachineFunction &MF) {
540   LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n";);
541   bool Changed = false;
542   ST = &MF.getSubtarget<X86Subtarget>();
543   TII = ST->getInstrInfo();
544   SM = &ST->getSchedModel();
545 
546   for (MachineBasicBlock &MBB : MF) {
547     for (MachineInstr &MI : MBB) {
548       if (processInstruction(MF, MBB, MI)) {
549         ++NumInstChanges;
550         Changed = true;
551       }
552     }
553   }
554   LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n";);
555   return Changed;
556 }
557