xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// \file
90b57cec5SDimitry Andric /// This file implements a TargetTransformInfo analysis pass specific to the
100b57cec5SDimitry Andric /// X86 target machine. It uses the target's detailed information to provide
110b57cec5SDimitry Andric /// more precise answers to certain TTI queries, while letting the target
120b57cec5SDimitry Andric /// independent and default TTI implementations handle the rest.
130b57cec5SDimitry Andric ///
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric /// About Cost Model numbers used below it's necessary to say the following:
16bdd1243dSDimitry Andric /// the numbers correspond to some "generic" X86 CPU instead of usage of a
17bdd1243dSDimitry Andric /// specific CPU model. Usually the numbers correspond to the CPU where the
18bdd1243dSDimitry Andric /// feature first appeared. For example, if we do Subtarget.hasSSE42() in
190b57cec5SDimitry Andric /// the lookups below the cost is based on Nehalem as that was the first CPU
20bdd1243dSDimitry Andric /// to support that feature level and thus has most likely the worst case cost,
21bdd1243dSDimitry Andric /// although we may discard an outlying worst cost from one CPU (e.g. Atom).
22bdd1243dSDimitry Andric ///
230b57cec5SDimitry Andric /// Some examples of other technologies/CPUs:
240b57cec5SDimitry Andric ///   SSE 3   - Pentium4 / Athlon64
250b57cec5SDimitry Andric ///   SSE 4.1 - Penryn
26bdd1243dSDimitry Andric ///   SSE 4.2 - Nehalem / Silvermont
27bdd1243dSDimitry Andric ///   AVX     - Sandy Bridge / Jaguar / Bulldozer
28bdd1243dSDimitry Andric ///   AVX2    - Haswell / Ryzen
290b57cec5SDimitry Andric ///   AVX-512 - Xeon Phi / Skylake
30bdd1243dSDimitry Andric ///
310b57cec5SDimitry Andric /// And some examples of instruction target dependent costs (latency)
320b57cec5SDimitry Andric ///                   divss     sqrtss          rsqrtss
330b57cec5SDimitry Andric ///   AMD K7          11-16     19              3
340b57cec5SDimitry Andric ///   Piledriver      9-24      13-15           5
350b57cec5SDimitry Andric ///   Jaguar          14        16              2
360b57cec5SDimitry Andric ///   Pentium II,III  18        30              2
370b57cec5SDimitry Andric ///   Nehalem         7-14      7-18            3
380b57cec5SDimitry Andric ///   Haswell         10-13     11              5
39bdd1243dSDimitry Andric ///
40bdd1243dSDimitry Andric /// Interpreting the 4 TargetCostKind types:
41bdd1243dSDimitry Andric /// TCK_RecipThroughput and TCK_Latency should try to match the worst case
42bdd1243dSDimitry Andric /// values reported by the CPU scheduler models (and llvm-mca).
43bdd1243dSDimitry Andric /// TCK_CodeSize should match the instruction count (e.g. divss = 1), NOT the
44bdd1243dSDimitry Andric /// actual encoding size of the instruction.
45bdd1243dSDimitry Andric /// TCK_SizeAndLatency should match the worst case micro-op counts reported by
46bdd1243dSDimitry Andric /// by the CPU scheduler models (and llvm-mca), to ensure that they are
47bdd1243dSDimitry Andric /// compatible with the MicroOpBufferSize and LoopMicroOpBufferSize values which are
48bdd1243dSDimitry Andric /// often used as the cost thresholds where TCK_SizeAndLatency is requested.
490b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric #include "X86TargetTransformInfo.h"
520b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
530b57cec5SDimitry Andric #include "llvm/CodeGen/BasicTTIImpl.h"
540b57cec5SDimitry Andric #include "llvm/CodeGen/CostTable.h"
550b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
5604eeddc0SDimitry Andric #include "llvm/IR/InstIterator.h"
570b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
580b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
59bdd1243dSDimitry Andric #include <optional>
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric using namespace llvm;
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric #define DEBUG_TYPE "x86tti"
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
660b57cec5SDimitry Andric //
670b57cec5SDimitry Andric // X86 cost model.
680b57cec5SDimitry Andric //
690b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
700b57cec5SDimitry Andric 
71bdd1243dSDimitry Andric // Helper struct to store/access costs for each cost kind.
72bdd1243dSDimitry Andric // TODO: Move this to allow other targets to use it?
73bdd1243dSDimitry Andric struct CostKindCosts {
74bdd1243dSDimitry Andric   unsigned RecipThroughputCost = ~0U;
75bdd1243dSDimitry Andric   unsigned LatencyCost = ~0U;
76bdd1243dSDimitry Andric   unsigned CodeSizeCost = ~0U;
77bdd1243dSDimitry Andric   unsigned SizeAndLatencyCost = ~0U;
78bdd1243dSDimitry Andric 
79bdd1243dSDimitry Andric   std::optional<unsigned>
80bdd1243dSDimitry Andric   operator[](TargetTransformInfo::TargetCostKind Kind) const {
81bdd1243dSDimitry Andric     unsigned Cost = ~0U;
82bdd1243dSDimitry Andric     switch (Kind) {
83bdd1243dSDimitry Andric     case TargetTransformInfo::TCK_RecipThroughput:
84bdd1243dSDimitry Andric       Cost = RecipThroughputCost;
85bdd1243dSDimitry Andric       break;
86bdd1243dSDimitry Andric     case TargetTransformInfo::TCK_Latency:
87bdd1243dSDimitry Andric       Cost = LatencyCost;
88bdd1243dSDimitry Andric       break;
89bdd1243dSDimitry Andric     case TargetTransformInfo::TCK_CodeSize:
90bdd1243dSDimitry Andric       Cost = CodeSizeCost;
91bdd1243dSDimitry Andric       break;
92bdd1243dSDimitry Andric     case TargetTransformInfo::TCK_SizeAndLatency:
93bdd1243dSDimitry Andric       Cost = SizeAndLatencyCost;
94bdd1243dSDimitry Andric       break;
95bdd1243dSDimitry Andric     }
96bdd1243dSDimitry Andric     if (Cost == ~0U)
97bdd1243dSDimitry Andric       return std::nullopt;
98bdd1243dSDimitry Andric     return Cost;
99bdd1243dSDimitry Andric   }
100bdd1243dSDimitry Andric };
101bdd1243dSDimitry Andric using CostKindTblEntry = CostTblEntryT<CostKindCosts>;
102*0fca6ea1SDimitry Andric using TypeConversionCostKindTblEntry = TypeConversionCostTblEntryT<CostKindCosts>;
103bdd1243dSDimitry Andric 
1040b57cec5SDimitry Andric TargetTransformInfo::PopcntSupportKind
1050b57cec5SDimitry Andric X86TTIImpl::getPopcntSupport(unsigned TyWidth) {
1060b57cec5SDimitry Andric   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
1070b57cec5SDimitry Andric   // TODO: Currently the __builtin_popcount() implementation using SSE3
1080b57cec5SDimitry Andric   //   instructions is inefficient. Once the problem is fixed, we should
1090b57cec5SDimitry Andric   //   call ST->hasSSE3() instead of ST->hasPOPCNT().
1100b57cec5SDimitry Andric   return ST->hasPOPCNT() ? TTI::PSK_FastHardware : TTI::PSK_Software;
1110b57cec5SDimitry Andric }
1120b57cec5SDimitry Andric 
113bdd1243dSDimitry Andric std::optional<unsigned> X86TTIImpl::getCacheSize(
1140b57cec5SDimitry Andric   TargetTransformInfo::CacheLevel Level) const {
1150b57cec5SDimitry Andric   switch (Level) {
1160b57cec5SDimitry Andric   case TargetTransformInfo::CacheLevel::L1D:
1170b57cec5SDimitry Andric     //   - Penryn
1180b57cec5SDimitry Andric     //   - Nehalem
1190b57cec5SDimitry Andric     //   - Westmere
1200b57cec5SDimitry Andric     //   - Sandy Bridge
1210b57cec5SDimitry Andric     //   - Ivy Bridge
1220b57cec5SDimitry Andric     //   - Haswell
1230b57cec5SDimitry Andric     //   - Broadwell
1240b57cec5SDimitry Andric     //   - Skylake
1250b57cec5SDimitry Andric     //   - Kabylake
1260b57cec5SDimitry Andric     return 32 * 1024;  //  32 KByte
1270b57cec5SDimitry Andric   case TargetTransformInfo::CacheLevel::L2D:
1280b57cec5SDimitry Andric     //   - Penryn
1290b57cec5SDimitry Andric     //   - Nehalem
1300b57cec5SDimitry Andric     //   - Westmere
1310b57cec5SDimitry Andric     //   - Sandy Bridge
1320b57cec5SDimitry Andric     //   - Ivy Bridge
1330b57cec5SDimitry Andric     //   - Haswell
1340b57cec5SDimitry Andric     //   - Broadwell
1350b57cec5SDimitry Andric     //   - Skylake
1360b57cec5SDimitry Andric     //   - Kabylake
1370b57cec5SDimitry Andric     return 256 * 1024; // 256 KByte
1380b57cec5SDimitry Andric   }
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric   llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
1410b57cec5SDimitry Andric }
1420b57cec5SDimitry Andric 
143bdd1243dSDimitry Andric std::optional<unsigned> X86TTIImpl::getCacheAssociativity(
1440b57cec5SDimitry Andric   TargetTransformInfo::CacheLevel Level) const {
1450b57cec5SDimitry Andric   //   - Penryn
1460b57cec5SDimitry Andric   //   - Nehalem
1470b57cec5SDimitry Andric   //   - Westmere
1480b57cec5SDimitry Andric   //   - Sandy Bridge
1490b57cec5SDimitry Andric   //   - Ivy Bridge
1500b57cec5SDimitry Andric   //   - Haswell
1510b57cec5SDimitry Andric   //   - Broadwell
1520b57cec5SDimitry Andric   //   - Skylake
1530b57cec5SDimitry Andric   //   - Kabylake
1540b57cec5SDimitry Andric   switch (Level) {
1550b57cec5SDimitry Andric   case TargetTransformInfo::CacheLevel::L1D:
156bdd1243dSDimitry Andric     [[fallthrough]];
1570b57cec5SDimitry Andric   case TargetTransformInfo::CacheLevel::L2D:
1580b57cec5SDimitry Andric     return 8;
1590b57cec5SDimitry Andric   }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
1620b57cec5SDimitry Andric }
1630b57cec5SDimitry Andric 
1648bcb0991SDimitry Andric unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const {
1658bcb0991SDimitry Andric   bool Vector = (ClassID == 1);
1660b57cec5SDimitry Andric   if (Vector && !ST->hasSSE1())
1670b57cec5SDimitry Andric     return 0;
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric   if (ST->is64Bit()) {
1700b57cec5SDimitry Andric     if (Vector && ST->hasAVX512())
1710b57cec5SDimitry Andric       return 32;
172*0fca6ea1SDimitry Andric     if (!Vector && ST->hasEGPR())
173*0fca6ea1SDimitry Andric       return 32;
1740b57cec5SDimitry Andric     return 16;
1750b57cec5SDimitry Andric   }
1760b57cec5SDimitry Andric   return 8;
1770b57cec5SDimitry Andric }
1780b57cec5SDimitry Andric 
179*0fca6ea1SDimitry Andric bool X86TTIImpl::hasConditionalLoadStoreForType(Type *Ty) const {
180*0fca6ea1SDimitry Andric   if (!ST->hasCF())
181*0fca6ea1SDimitry Andric     return false;
182*0fca6ea1SDimitry Andric   if (!Ty)
183*0fca6ea1SDimitry Andric     return true;
184*0fca6ea1SDimitry Andric   // Conditional faulting is supported by CFCMOV, which only accepts
185*0fca6ea1SDimitry Andric   // 16/32/64-bit operands.
186*0fca6ea1SDimitry Andric   // TODO: Support f32/f64 with VMOVSS/VMOVSD with zero mask when it's
187*0fca6ea1SDimitry Andric   // profitable.
188*0fca6ea1SDimitry Andric   auto *VTy = dyn_cast<FixedVectorType>(Ty);
189*0fca6ea1SDimitry Andric   if (!Ty->isIntegerTy() && (!VTy || VTy->getNumElements() != 1))
190*0fca6ea1SDimitry Andric     return false;
191*0fca6ea1SDimitry Andric   auto *ScalarTy = Ty->getScalarType();
192*0fca6ea1SDimitry Andric   switch (cast<IntegerType>(ScalarTy)->getBitWidth()) {
193*0fca6ea1SDimitry Andric   default:
194*0fca6ea1SDimitry Andric     return false;
195*0fca6ea1SDimitry Andric   case 16:
196*0fca6ea1SDimitry Andric   case 32:
197*0fca6ea1SDimitry Andric   case 64:
198*0fca6ea1SDimitry Andric     return true;
199*0fca6ea1SDimitry Andric   }
200*0fca6ea1SDimitry Andric }
201*0fca6ea1SDimitry Andric 
202fe6060f1SDimitry Andric TypeSize
203fe6060f1SDimitry Andric X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
2040b57cec5SDimitry Andric   unsigned PreferVectorWidth = ST->getPreferVectorWidth();
205fe6060f1SDimitry Andric   switch (K) {
206fe6060f1SDimitry Andric   case TargetTransformInfo::RGK_Scalar:
207fe6060f1SDimitry Andric     return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
208fe6060f1SDimitry Andric   case TargetTransformInfo::RGK_FixedWidthVector:
2095f757f3fSDimitry Andric     if (ST->hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
210fe6060f1SDimitry Andric       return TypeSize::getFixed(512);
2110b57cec5SDimitry Andric     if (ST->hasAVX() && PreferVectorWidth >= 256)
212fe6060f1SDimitry Andric       return TypeSize::getFixed(256);
2130b57cec5SDimitry Andric     if (ST->hasSSE1() && PreferVectorWidth >= 128)
214fe6060f1SDimitry Andric       return TypeSize::getFixed(128);
215fe6060f1SDimitry Andric     return TypeSize::getFixed(0);
216fe6060f1SDimitry Andric   case TargetTransformInfo::RGK_ScalableVector:
217fe6060f1SDimitry Andric     return TypeSize::getScalable(0);
2180b57cec5SDimitry Andric   }
2190b57cec5SDimitry Andric 
220fe6060f1SDimitry Andric   llvm_unreachable("Unsupported register kind");
2210b57cec5SDimitry Andric }
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const {
224fe6060f1SDimitry Andric   return getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
225bdd1243dSDimitry Andric       .getFixedValue();
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric 
22806c3fb27SDimitry Andric unsigned X86TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
2290b57cec5SDimitry Andric   // If the loop will not be vectorized, don't interleave the loop.
2300b57cec5SDimitry Andric   // Let regular unroll to unroll the loop, which saves the overflow
2310b57cec5SDimitry Andric   // check and memory check cost.
23206c3fb27SDimitry Andric   if (VF.isScalar())
2330b57cec5SDimitry Andric     return 1;
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   if (ST->isAtom())
2360b57cec5SDimitry Andric     return 1;
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric   // Sandybridge and Haswell have multiple execution ports and pipelined
2390b57cec5SDimitry Andric   // vector units.
2400b57cec5SDimitry Andric   if (ST->hasAVX())
2410b57cec5SDimitry Andric     return 4;
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric   return 2;
2440b57cec5SDimitry Andric }
2450b57cec5SDimitry Andric 
246fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getArithmeticInstrCost(
247fe6060f1SDimitry Andric     unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
248bdd1243dSDimitry Andric     TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
249bdd1243dSDimitry Andric     ArrayRef<const Value *> Args,
250480093f4SDimitry Andric     const Instruction *CxtI) {
251fe6060f1SDimitry Andric 
252fe6060f1SDimitry Andric   // vXi8 multiplications are always promoted to vXi16.
25306c3fb27SDimitry Andric   // Sub-128-bit types can be extended/packed more efficiently.
254fe6060f1SDimitry Andric   if (Opcode == Instruction::Mul && Ty->isVectorTy() &&
25506c3fb27SDimitry Andric       Ty->getPrimitiveSizeInBits() <= 64 && Ty->getScalarSizeInBits() == 8) {
256fe6060f1SDimitry Andric     Type *WideVecTy =
257fe6060f1SDimitry Andric         VectorType::getExtendedElementVectorType(cast<VectorType>(Ty));
258fe6060f1SDimitry Andric     return getCastInstrCost(Instruction::ZExt, WideVecTy, Ty,
259fe6060f1SDimitry Andric                             TargetTransformInfo::CastContextHint::None,
260fe6060f1SDimitry Andric                             CostKind) +
261fe6060f1SDimitry Andric            getCastInstrCost(Instruction::Trunc, Ty, WideVecTy,
262fe6060f1SDimitry Andric                             TargetTransformInfo::CastContextHint::None,
263fe6060f1SDimitry Andric                             CostKind) +
264bdd1243dSDimitry Andric            getArithmeticInstrCost(Opcode, WideVecTy, CostKind, Op1Info, Op2Info);
265fe6060f1SDimitry Andric   }
266fe6060f1SDimitry Andric 
2670b57cec5SDimitry Andric   // Legalize the type.
268bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric   int ISD = TLI->InstructionOpcodeToISD(Opcode);
2710b57cec5SDimitry Andric   assert(ISD && "Invalid opcode");
2720b57cec5SDimitry Andric 
273349cc55cSDimitry Andric   if (ISD == ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
27406c3fb27SDimitry Andric       (LT.second.getScalarType() == MVT::i32 ||
27506c3fb27SDimitry Andric        LT.second.getScalarType() == MVT::i64)) {
276349cc55cSDimitry Andric     // Check if the operands can be represented as a smaller datatype.
277349cc55cSDimitry Andric     bool Op1Signed = false, Op2Signed = false;
278349cc55cSDimitry Andric     unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
279349cc55cSDimitry Andric     unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed);
280349cc55cSDimitry Andric     unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
281bdd1243dSDimitry Andric     bool SignedMode = Op1Signed || Op2Signed;
282349cc55cSDimitry Andric 
28306c3fb27SDimitry Andric     // If both vXi32 are representable as i15 and at least one is constant,
284349cc55cSDimitry Andric     // zero-extended, or sign-extended from vXi16 (or less pre-SSE41) then we
285349cc55cSDimitry Andric     // can treat this as PMADDWD which has the same costs as a vXi16 multiply.
28606c3fb27SDimitry Andric     if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
28706c3fb27SDimitry Andric         LT.second.getScalarType() == MVT::i32) {
288349cc55cSDimitry Andric       bool Op1Constant =
289349cc55cSDimitry Andric           isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
290349cc55cSDimitry Andric       bool Op2Constant =
291349cc55cSDimitry Andric           isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
292349cc55cSDimitry Andric       bool Op1Sext = isa<SExtInst>(Args[0]) &&
293349cc55cSDimitry Andric                      (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->hasSSE41()));
294349cc55cSDimitry Andric       bool Op2Sext = isa<SExtInst>(Args[1]) &&
295349cc55cSDimitry Andric                      (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->hasSSE41()));
296349cc55cSDimitry Andric 
297349cc55cSDimitry Andric       bool IsZeroExtended = !Op1Signed || !Op2Signed;
298349cc55cSDimitry Andric       bool IsConstant = Op1Constant || Op2Constant;
299349cc55cSDimitry Andric       bool IsSext = Op1Sext || Op2Sext;
300349cc55cSDimitry Andric       if (IsConstant || IsZeroExtended || IsSext)
301349cc55cSDimitry Andric         LT.second =
302349cc55cSDimitry Andric             MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements());
303349cc55cSDimitry Andric     }
304349cc55cSDimitry Andric 
305bdd1243dSDimitry Andric     // Check if the vXi32 operands can be shrunk into a smaller datatype.
306bdd1243dSDimitry Andric     // This should match the codegen from reduceVMULWidth.
307bdd1243dSDimitry Andric     // TODO: Make this generic (!ST->SSE41 || ST->isPMULLDSlow()).
308bdd1243dSDimitry Andric     if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
3090b57cec5SDimitry Andric       if (OpMinSize <= 7)
3100b57cec5SDimitry Andric         return LT.first * 3; // pmullw/sext
311e8d8bef9SDimitry Andric       if (!SignedMode && OpMinSize <= 8)
3120b57cec5SDimitry Andric         return LT.first * 3; // pmullw/zext
3130b57cec5SDimitry Andric       if (OpMinSize <= 15)
3140b57cec5SDimitry Andric         return LT.first * 5; // pmullw/pmulhw/pshuf
315e8d8bef9SDimitry Andric       if (!SignedMode && OpMinSize <= 16)
3160b57cec5SDimitry Andric         return LT.first * 5; // pmullw/pmulhw/pshuf
3170b57cec5SDimitry Andric     }
31806c3fb27SDimitry Andric 
31906c3fb27SDimitry Andric     // If both vXi64 are representable as (unsigned) i32, then we can perform
32006c3fb27SDimitry Andric     // the multiple with a single PMULUDQ instruction.
32106c3fb27SDimitry Andric     // TODO: Add (SSE41+) PMULDQ handling for signed extensions.
32206c3fb27SDimitry Andric     if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
32306c3fb27SDimitry Andric       ISD = X86ISD::PMULUDQ;
3240b57cec5SDimitry Andric   }
3250b57cec5SDimitry Andric 
326bdd1243dSDimitry Andric   // Vector multiply by pow2 will be simplified to shifts.
327bdd1243dSDimitry Andric   // Vector multiply by -pow2 will be simplified to shifts/negates.
328bdd1243dSDimitry Andric   if (ISD == ISD::MUL && Op2Info.isConstant() &&
329bdd1243dSDimitry Andric       (Op2Info.isPowerOf2() || Op2Info.isNegatedPowerOf2())) {
330bdd1243dSDimitry Andric     InstructionCost Cost =
331bdd1243dSDimitry Andric         getArithmeticInstrCost(Instruction::Shl, Ty, CostKind,
332bdd1243dSDimitry Andric                                Op1Info.getNoProps(), Op2Info.getNoProps());
333bdd1243dSDimitry Andric     if (Op2Info.isNegatedPowerOf2())
334bdd1243dSDimitry Andric       Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind);
335bdd1243dSDimitry Andric     return Cost;
336bdd1243dSDimitry Andric   }
337bdd1243dSDimitry Andric 
338bdd1243dSDimitry Andric   // On X86, vector signed division by constants power-of-two are
339bdd1243dSDimitry Andric   // normally expanded to the sequence SRA + SRL + ADD + SRA.
340bdd1243dSDimitry Andric   // The OperandValue properties may not be the same as that of the previous
341bdd1243dSDimitry Andric   // operation; conservatively assume OP_None.
342bdd1243dSDimitry Andric   if ((ISD == ISD::SDIV || ISD == ISD::SREM) &&
343bdd1243dSDimitry Andric       Op2Info.isConstant() && Op2Info.isPowerOf2()) {
344bdd1243dSDimitry Andric     InstructionCost Cost =
345bdd1243dSDimitry Andric         2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
346bdd1243dSDimitry Andric                                    Op1Info.getNoProps(), Op2Info.getNoProps());
347bdd1243dSDimitry Andric     Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
348bdd1243dSDimitry Andric                                    Op1Info.getNoProps(), Op2Info.getNoProps());
349bdd1243dSDimitry Andric     Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
350bdd1243dSDimitry Andric                                    Op1Info.getNoProps(), Op2Info.getNoProps());
351bdd1243dSDimitry Andric 
352bdd1243dSDimitry Andric     if (ISD == ISD::SREM) {
353bdd1243dSDimitry Andric       // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
354bdd1243dSDimitry Andric       Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info.getNoProps(),
355bdd1243dSDimitry Andric                                      Op2Info.getNoProps());
356bdd1243dSDimitry Andric       Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info.getNoProps(),
357bdd1243dSDimitry Andric                                      Op2Info.getNoProps());
358bdd1243dSDimitry Andric     }
359bdd1243dSDimitry Andric 
360bdd1243dSDimitry Andric     return Cost;
361bdd1243dSDimitry Andric   }
362bdd1243dSDimitry Andric 
363bdd1243dSDimitry Andric   // Vector unsigned division/remainder will be simplified to shifts/masks.
364bdd1243dSDimitry Andric   if ((ISD == ISD::UDIV || ISD == ISD::UREM) &&
365bdd1243dSDimitry Andric       Op2Info.isConstant() && Op2Info.isPowerOf2()) {
366bdd1243dSDimitry Andric     if (ISD == ISD::UDIV)
367bdd1243dSDimitry Andric       return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
368bdd1243dSDimitry Andric                                     Op1Info.getNoProps(), Op2Info.getNoProps());
369bdd1243dSDimitry Andric     // UREM
370bdd1243dSDimitry Andric     return getArithmeticInstrCost(Instruction::And, Ty, CostKind,
371bdd1243dSDimitry Andric                                   Op1Info.getNoProps(), Op2Info.getNoProps());
372bdd1243dSDimitry Andric   }
373bdd1243dSDimitry Andric 
374*0fca6ea1SDimitry Andric   static const CostKindTblEntry GFNIUniformConstCostTable[] = {
375*0fca6ea1SDimitry Andric     { ISD::SHL,  MVT::v16i8,  { 1, 6, 1, 2 } }, // gf2p8affineqb
376*0fca6ea1SDimitry Andric     { ISD::SRL,  MVT::v16i8,  { 1, 6, 1, 2 } }, // gf2p8affineqb
377*0fca6ea1SDimitry Andric     { ISD::SRA,  MVT::v16i8,  { 1, 6, 1, 2 } }, // gf2p8affineqb
378*0fca6ea1SDimitry Andric     { ISD::SHL,  MVT::v32i8,  { 1, 6, 1, 2 } }, // gf2p8affineqb
379*0fca6ea1SDimitry Andric     { ISD::SRL,  MVT::v32i8,  { 1, 6, 1, 2 } }, // gf2p8affineqb
380*0fca6ea1SDimitry Andric     { ISD::SRA,  MVT::v32i8,  { 1, 6, 1, 2 } }, // gf2p8affineqb
381*0fca6ea1SDimitry Andric     { ISD::SHL,  MVT::v64i8,  { 1, 6, 1, 2 } }, // gf2p8affineqb
382*0fca6ea1SDimitry Andric     { ISD::SRL,  MVT::v64i8,  { 1, 6, 1, 2 } }, // gf2p8affineqb
383*0fca6ea1SDimitry Andric     { ISD::SRA,  MVT::v64i8,  { 1, 6, 1, 2 } }, // gf2p8affineqb
384*0fca6ea1SDimitry Andric   };
385*0fca6ea1SDimitry Andric 
386*0fca6ea1SDimitry Andric   if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasGFNI())
387*0fca6ea1SDimitry Andric     if (const auto *Entry =
388*0fca6ea1SDimitry Andric             CostTableLookup(GFNIUniformConstCostTable, ISD, LT.second))
389*0fca6ea1SDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
390*0fca6ea1SDimitry Andric         return LT.first * *KindCost;
391*0fca6ea1SDimitry Andric 
392bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512BWUniformConstCostTable[] = {
393bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8,  { 1, 7, 2, 3 } }, // psllw + pand.
394bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8,  { 1, 7, 2, 3 } }, // psrlw + pand.
395bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8,  { 1, 8, 4, 5 } }, // psrlw, pand, pxor, psubb.
396bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i8,  { 1, 8, 2, 3 } }, // psllw + pand.
397bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i8,  { 1, 8, 2, 3 } }, // psrlw + pand.
398bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i8,  { 1, 9, 4, 5 } }, // psrlw, pand, pxor, psubb.
399bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v64i8,  { 1, 8, 2, 3 } }, // psllw + pand.
400bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v64i8,  { 1, 8, 2, 3 } }, // psrlw + pand.
401bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v64i8,  { 1, 9, 4, 6 } }, // psrlw, pand, pxor, psubb.
402bdd1243dSDimitry Andric 
403bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i16, { 1, 1, 1, 1 } }, // psllw
404bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i16, { 1, 1, 1, 1 } }, // psrlw
405bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i16, { 1, 1, 1, 1 } }, // psrlw
406bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i16, { 1, 1, 1, 1 } }, // psllw
407bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i16, { 1, 1, 1, 1 } }, // psrlw
408bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i16, { 1, 1, 1, 1 } }, // psrlw
4090b57cec5SDimitry Andric   };
4100b57cec5SDimitry Andric 
411bdd1243dSDimitry Andric   if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasBWI())
412bdd1243dSDimitry Andric     if (const auto *Entry =
413bdd1243dSDimitry Andric             CostTableLookup(AVX512BWUniformConstCostTable, ISD, LT.second))
414bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
415bdd1243dSDimitry Andric         return LT.first * *KindCost;
4160b57cec5SDimitry Andric 
417bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512UniformConstCostTable[] = {
418bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v64i8,  {  2, 12,  5,  6 } }, // psllw + pand.
419bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v64i8,  {  2, 12,  5,  6 } }, // psrlw + pand.
420bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v64i8,  {  3, 10, 12, 12 } }, // psrlw, pand, pxor, psubb.
4215ffd83dbSDimitry Andric 
422bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i16, {  2,  7,  4,  4 } }, // psllw + split.
423bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i16, {  2,  7,  4,  4 } }, // psrlw + split.
424bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i16, {  2,  7,  4,  4 } }, // psraw + split.
425e8d8bef9SDimitry Andric 
426bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i32,  {  1,  1,  1,  1 } }, // pslld
427bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i32,  {  1,  1,  1,  1 } }, // psrld
428bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i32,  {  1,  1,  1,  1 } }, // psrad
429bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i32, {  1,  1,  1,  1 } }, // pslld
430bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i32, {  1,  1,  1,  1 } }, // psrld
431bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i32, {  1,  1,  1,  1 } }, // psrad
432bdd1243dSDimitry Andric 
433bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64,  {  1,  1,  1,  1 } }, // psraq
434bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i64,  {  1,  1,  1,  1 } }, // psllq
435bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i64,  {  1,  1,  1,  1 } }, // psrlq
436bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i64,  {  1,  1,  1,  1 } }, // psraq
437bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i64,  {  1,  1,  1,  1 } }, // psllq
438bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i64,  {  1,  1,  1,  1 } }, // psrlq
439bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i64,  {  1,  1,  1,  1 } }, // psraq
440bdd1243dSDimitry Andric 
441bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v16i32, {  6 } }, // pmuludq sequence
442bdd1243dSDimitry Andric     { ISD::SREM, MVT::v16i32, {  8 } }, // pmuludq+mul+sub sequence
443bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v16i32, {  5 } }, // pmuludq sequence
444bdd1243dSDimitry Andric     { ISD::UREM, MVT::v16i32, {  7 } }, // pmuludq+mul+sub sequence
4450b57cec5SDimitry Andric   };
4460b57cec5SDimitry Andric 
447bdd1243dSDimitry Andric   if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasAVX512())
448bdd1243dSDimitry Andric     if (const auto *Entry =
449bdd1243dSDimitry Andric             CostTableLookup(AVX512UniformConstCostTable, ISD, LT.second))
450bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
451bdd1243dSDimitry Andric         return LT.first * *KindCost;
4520b57cec5SDimitry Andric 
453bdd1243dSDimitry Andric   static const CostKindTblEntry AVX2UniformConstCostTable[] = {
454bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8, {  1,  8,  2,  3 } }, // psllw + pand.
455bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8, {  1,  8,  2,  3 } }, // psrlw + pand.
456bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8, {  2, 10,  5,  6 } }, // psrlw, pand, pxor, psubb.
457bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i8, {  2,  8,  2,  4 } }, // psllw + pand.
458bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i8, {  2,  8,  2,  4 } }, // psrlw + pand.
459bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i8, {  3, 10,  5,  9 } }, // psrlw, pand, pxor, psubb.
4600b57cec5SDimitry Andric 
461bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i16, {  1,  1,  1,  1 } }, // psllw
462bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i16, {  1,  1,  1,  1 } }, // psrlw
463bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i16, {  1,  1,  1,  1 } }, // psraw
464bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i16,{  2,  2,  1,  2 } }, // psllw
465bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i16,{  2,  2,  1,  2 } }, // psrlw
466bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i16,{  2,  2,  1,  2 } }, // psraw
467e8d8bef9SDimitry Andric 
468bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i32, {  1,  1,  1,  1 } }, // pslld
469bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i32, {  1,  1,  1,  1 } }, // psrld
470bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i32, {  1,  1,  1,  1 } }, // psrad
471bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i32, {  2,  2,  1,  2 } }, // pslld
472bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i32, {  2,  2,  1,  2 } }, // psrld
473bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i32, {  2,  2,  1,  2 } }, // psrad
474bdd1243dSDimitry Andric 
475bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v2i64, {  1,  1,  1,  1 } }, // psllq
476bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v2i64, {  1,  1,  1,  1 } }, // psrlq
477bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64, {  2,  3,  3,  3 } }, // psrad + shuffle.
478bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i64, {  2,  2,  1,  2 } }, // psllq
479bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i64, {  2,  2,  1,  2 } }, // psrlq
480bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i64, {  4,  4,  3,  6 } }, // psrad + shuffle + split.
481bdd1243dSDimitry Andric 
482bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v8i32, {  6 } }, // pmuludq sequence
483bdd1243dSDimitry Andric     { ISD::SREM, MVT::v8i32, {  8 } }, // pmuludq+mul+sub sequence
484bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v8i32, {  5 } }, // pmuludq sequence
485bdd1243dSDimitry Andric     { ISD::UREM, MVT::v8i32, {  7 } }, // pmuludq+mul+sub sequence
4860b57cec5SDimitry Andric   };
4870b57cec5SDimitry Andric 
488bdd1243dSDimitry Andric   if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasAVX2())
489bdd1243dSDimitry Andric     if (const auto *Entry =
490bdd1243dSDimitry Andric             CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second))
491bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
492bdd1243dSDimitry Andric         return LT.first * *KindCost;
4930b57cec5SDimitry Andric 
494bdd1243dSDimitry Andric   static const CostKindTblEntry AVXUniformConstCostTable[] = {
495bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8, {  2,  7,  2,  3 } }, // psllw + pand.
496bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8, {  2,  7,  2,  3 } }, // psrlw + pand.
497bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8, {  3,  9,  5,  6 } }, // psrlw, pand, pxor, psubb.
498bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i8, {  4,  7,  7,  8 } }, // 2*(psllw + pand) + split.
499bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i8, {  4,  7,  7,  8 } }, // 2*(psrlw + pand) + split.
500bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i8, {  7,  7, 12, 13 } }, // 2*(psrlw, pand, pxor, psubb) + split.
5010b57cec5SDimitry Andric 
502bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i16, {  1,  2,  1,  1 } }, // psllw.
503bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i16, {  1,  2,  1,  1 } }, // psrlw.
504bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i16, {  1,  2,  1,  1 } }, // psraw.
505bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i16,{  3,  6,  4,  5 } }, // psllw + split.
506bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i16,{  3,  6,  4,  5 } }, // psrlw + split.
507bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i16,{  3,  6,  4,  5 } }, // psraw + split.
508e8d8bef9SDimitry Andric 
509bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i32, {  1,  2,  1,  1 } }, // pslld.
510bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i32, {  1,  2,  1,  1 } }, // psrld.
511bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i32, {  1,  2,  1,  1 } }, // psrad.
512bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i32, {  3,  6,  4,  5 } }, // pslld + split.
513bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i32, {  3,  6,  4,  5 } }, // psrld + split.
514bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i32, {  3,  6,  4,  5 } }, // psrad + split.
515bdd1243dSDimitry Andric 
516bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v2i64, {  1,  2,  1,  1 } }, // psllq.
517bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v2i64, {  1,  2,  1,  1 } }, // psrlq.
518bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64, {  2,  3,  3,  3 } }, // psrad + shuffle.
519bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i64, {  3,  6,  4,  5 } }, // 2 x psllq + split.
520bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i64, {  3,  6,  4,  5 } }, // 2 x psllq + split.
521bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i64, {  5,  7,  8,  9 } }, // 2 x psrad + shuffle + split.
522bdd1243dSDimitry Andric 
523bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v8i32, { 14 } }, // 2*pmuludq sequence + split.
524bdd1243dSDimitry Andric     { ISD::SREM, MVT::v8i32, { 18 } }, // 2*pmuludq+mul+sub sequence + split.
525bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v8i32, { 12 } }, // 2*pmuludq sequence + split.
526bdd1243dSDimitry Andric     { ISD::UREM, MVT::v8i32, { 16 } }, // 2*pmuludq+mul+sub sequence + split.
5270b57cec5SDimitry Andric   };
5280b57cec5SDimitry Andric 
5290b57cec5SDimitry Andric   // XOP has faster vXi8 shifts.
530bdd1243dSDimitry Andric   if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasAVX() &&
531bdd1243dSDimitry Andric       (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
532bdd1243dSDimitry Andric     if (const auto *Entry =
533bdd1243dSDimitry Andric             CostTableLookup(AVXUniformConstCostTable, ISD, LT.second))
534bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
535bdd1243dSDimitry Andric         return LT.first * *KindCost;
536bdd1243dSDimitry Andric 
537bdd1243dSDimitry Andric   static const CostKindTblEntry SSE2UniformConstCostTable[] = {
538bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8, {  1,  7,  2,  3 } }, // psllw + pand.
539bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8, {  1,  7,  2,  3 } }, // psrlw + pand.
540bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8, {  3,  9,  5,  6 } }, // psrlw, pand, pxor, psubb.
541bdd1243dSDimitry Andric 
542bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i16, {  1,  1,  1,  1 } }, // psllw.
543bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i16, {  1,  1,  1,  1 } }, // psrlw.
544bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i16, {  1,  1,  1,  1 } }, // psraw.
545bdd1243dSDimitry Andric 
546bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i32, {  1,  1,  1,  1 } }, // pslld
547bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i32, {  1,  1,  1,  1 } }, // psrld.
548bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i32, {  1,  1,  1,  1 } }, // psrad.
549bdd1243dSDimitry Andric 
550bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v2i64, {  1,  1,  1,  1 } }, // psllq.
551bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v2i64, {  1,  1,  1,  1 } }, // psrlq.
552bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64, {  3,  5,  6,  6 } }, // 2 x psrad + shuffle.
553bdd1243dSDimitry Andric 
554bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v4i32, {  6 } }, // pmuludq sequence
555bdd1243dSDimitry Andric     { ISD::SREM, MVT::v4i32, {  8 } }, // pmuludq+mul+sub sequence
556bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v4i32, {  5 } }, // pmuludq sequence
557bdd1243dSDimitry Andric     { ISD::UREM, MVT::v4i32, {  7 } }, // pmuludq+mul+sub sequence
558bdd1243dSDimitry Andric   };
559bdd1243dSDimitry Andric 
560bdd1243dSDimitry Andric   // XOP has faster vXi8 shifts.
561bdd1243dSDimitry Andric   if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasSSE2() &&
562bdd1243dSDimitry Andric       (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
5630b57cec5SDimitry Andric     if (const auto *Entry =
5640b57cec5SDimitry Andric             CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
565bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
566bdd1243dSDimitry Andric         return LT.first * *KindCost;
5670b57cec5SDimitry Andric 
568bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512BWConstCostTable[] = {
569bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v64i8,  { 14 } }, // 2*ext+2*pmulhw sequence
570bdd1243dSDimitry Andric     { ISD::SREM, MVT::v64i8,  { 16 } }, // 2*ext+2*pmulhw+mul+sub sequence
571bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v64i8,  { 14 } }, // 2*ext+2*pmulhw sequence
572bdd1243dSDimitry Andric     { ISD::UREM, MVT::v64i8,  { 16 } }, // 2*ext+2*pmulhw+mul+sub sequence
573bdd1243dSDimitry Andric 
574bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v32i16, {  6 } }, // vpmulhw sequence
575bdd1243dSDimitry Andric     { ISD::SREM, MVT::v32i16, {  8 } }, // vpmulhw+mul+sub sequence
576bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v32i16, {  6 } }, // vpmulhuw sequence
577bdd1243dSDimitry Andric     { ISD::UREM, MVT::v32i16, {  8 } }, // vpmulhuw+mul+sub sequence
5780b57cec5SDimitry Andric   };
5790b57cec5SDimitry Andric 
580bdd1243dSDimitry Andric   if (Op2Info.isConstant() && ST->hasBWI())
5810b57cec5SDimitry Andric     if (const auto *Entry =
5820b57cec5SDimitry Andric             CostTableLookup(AVX512BWConstCostTable, ISD, LT.second))
583bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
584bdd1243dSDimitry Andric         return LT.first * *KindCost;
5850b57cec5SDimitry Andric 
586bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512ConstCostTable[] = {
587bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v64i8,  { 28 } }, // 4*ext+4*pmulhw sequence
588bdd1243dSDimitry Andric     { ISD::SREM, MVT::v64i8,  { 32 } }, // 4*ext+4*pmulhw+mul+sub sequence
589bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v64i8,  { 28 } }, // 4*ext+4*pmulhw sequence
590bdd1243dSDimitry Andric     { ISD::UREM, MVT::v64i8,  { 32 } }, // 4*ext+4*pmulhw+mul+sub sequence
591bdd1243dSDimitry Andric 
592bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v32i16, { 12 } }, // 2*vpmulhw sequence
593bdd1243dSDimitry Andric     { ISD::SREM, MVT::v32i16, { 16 } }, // 2*vpmulhw+mul+sub sequence
594bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v32i16, { 12 } }, // 2*vpmulhuw sequence
595bdd1243dSDimitry Andric     { ISD::UREM, MVT::v32i16, { 16 } }, // 2*vpmulhuw+mul+sub sequence
596bdd1243dSDimitry Andric 
597bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v16i32, { 15 } }, // vpmuldq sequence
598bdd1243dSDimitry Andric     { ISD::SREM, MVT::v16i32, { 17 } }, // vpmuldq+mul+sub sequence
599bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v16i32, { 15 } }, // vpmuludq sequence
600bdd1243dSDimitry Andric     { ISD::UREM, MVT::v16i32, { 17 } }, // vpmuludq+mul+sub sequence
6010b57cec5SDimitry Andric   };
6020b57cec5SDimitry Andric 
603bdd1243dSDimitry Andric   if (Op2Info.isConstant() && ST->hasAVX512())
6040b57cec5SDimitry Andric     if (const auto *Entry =
6050b57cec5SDimitry Andric             CostTableLookup(AVX512ConstCostTable, ISD, LT.second))
606bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
607bdd1243dSDimitry Andric         return LT.first * *KindCost;
6080b57cec5SDimitry Andric 
609bdd1243dSDimitry Andric   static const CostKindTblEntry AVX2ConstCostTable[] = {
610bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v32i8,  { 14 } }, // 2*ext+2*pmulhw sequence
611bdd1243dSDimitry Andric     { ISD::SREM, MVT::v32i8,  { 16 } }, // 2*ext+2*pmulhw+mul+sub sequence
612bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v32i8,  { 14 } }, // 2*ext+2*pmulhw sequence
613bdd1243dSDimitry Andric     { ISD::UREM, MVT::v32i8,  { 16 } }, // 2*ext+2*pmulhw+mul+sub sequence
614bdd1243dSDimitry Andric 
615bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v16i16, {  6 } }, // vpmulhw sequence
616bdd1243dSDimitry Andric     { ISD::SREM, MVT::v16i16, {  8 } }, // vpmulhw+mul+sub sequence
617bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v16i16, {  6 } }, // vpmulhuw sequence
618bdd1243dSDimitry Andric     { ISD::UREM, MVT::v16i16, {  8 } }, // vpmulhuw+mul+sub sequence
619bdd1243dSDimitry Andric 
620bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v8i32,  { 15 } }, // vpmuldq sequence
621bdd1243dSDimitry Andric     { ISD::SREM, MVT::v8i32,  { 19 } }, // vpmuldq+mul+sub sequence
622bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v8i32,  { 15 } }, // vpmuludq sequence
623bdd1243dSDimitry Andric     { ISD::UREM, MVT::v8i32,  { 19 } }, // vpmuludq+mul+sub sequence
6240b57cec5SDimitry Andric   };
6250b57cec5SDimitry Andric 
626bdd1243dSDimitry Andric   if (Op2Info.isConstant() && ST->hasAVX2())
6270b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
628bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
629bdd1243dSDimitry Andric         return LT.first * *KindCost;
6300b57cec5SDimitry Andric 
631bdd1243dSDimitry Andric   static const CostKindTblEntry AVXConstCostTable[] = {
632bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v32i8,  { 30 } }, // 4*ext+4*pmulhw sequence + split.
633bdd1243dSDimitry Andric     { ISD::SREM, MVT::v32i8,  { 34 } }, // 4*ext+4*pmulhw+mul+sub sequence + split.
634bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v32i8,  { 30 } }, // 4*ext+4*pmulhw sequence + split.
635bdd1243dSDimitry Andric     { ISD::UREM, MVT::v32i8,  { 34 } }, // 4*ext+4*pmulhw+mul+sub sequence + split.
636bdd1243dSDimitry Andric 
637bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v16i16, { 14 } }, // 2*pmulhw sequence + split.
638bdd1243dSDimitry Andric     { ISD::SREM, MVT::v16i16, { 18 } }, // 2*pmulhw+mul+sub sequence + split.
639bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v16i16, { 14 } }, // 2*pmulhuw sequence + split.
640bdd1243dSDimitry Andric     { ISD::UREM, MVT::v16i16, { 18 } }, // 2*pmulhuw+mul+sub sequence + split.
641bdd1243dSDimitry Andric 
642bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v8i32,  { 32 } }, // vpmuludq sequence
643bdd1243dSDimitry Andric     { ISD::SREM, MVT::v8i32,  { 38 } }, // vpmuludq+mul+sub sequence
644bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v8i32,  { 32 } }, // 2*pmuludq sequence + split.
645bdd1243dSDimitry Andric     { ISD::UREM, MVT::v8i32,  { 42 } }, // 2*pmuludq+mul+sub sequence + split.
6460b57cec5SDimitry Andric   };
6470b57cec5SDimitry Andric 
648bdd1243dSDimitry Andric   if (Op2Info.isConstant() && ST->hasAVX())
649bdd1243dSDimitry Andric     if (const auto *Entry = CostTableLookup(AVXConstCostTable, ISD, LT.second))
650bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
651bdd1243dSDimitry Andric         return LT.first * *KindCost;
6520b57cec5SDimitry Andric 
653bdd1243dSDimitry Andric   static const CostKindTblEntry SSE41ConstCostTable[] = {
654bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v4i32,  { 15 } }, // vpmuludq sequence
655bdd1243dSDimitry Andric     { ISD::SREM, MVT::v4i32,  { 20 } }, // vpmuludq+mul+sub sequence
656bdd1243dSDimitry Andric   };
657bdd1243dSDimitry Andric 
658bdd1243dSDimitry Andric   if (Op2Info.isConstant() && ST->hasSSE41())
659bdd1243dSDimitry Andric     if (const auto *Entry =
660bdd1243dSDimitry Andric             CostTableLookup(SSE41ConstCostTable, ISD, LT.second))
661bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
662bdd1243dSDimitry Andric         return LT.first * *KindCost;
663bdd1243dSDimitry Andric 
664bdd1243dSDimitry Andric   static const CostKindTblEntry SSE2ConstCostTable[] = {
665bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v16i8,  { 14 } }, // 2*ext+2*pmulhw sequence
666bdd1243dSDimitry Andric     { ISD::SREM, MVT::v16i8,  { 16 } }, // 2*ext+2*pmulhw+mul+sub sequence
667bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v16i8,  { 14 } }, // 2*ext+2*pmulhw sequence
668bdd1243dSDimitry Andric     { ISD::UREM, MVT::v16i8,  { 16 } }, // 2*ext+2*pmulhw+mul+sub sequence
669bdd1243dSDimitry Andric 
670bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v8i16,  {  6 } }, // pmulhw sequence
671bdd1243dSDimitry Andric     { ISD::SREM, MVT::v8i16,  {  8 } }, // pmulhw+mul+sub sequence
672bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v8i16,  {  6 } }, // pmulhuw sequence
673bdd1243dSDimitry Andric     { ISD::UREM, MVT::v8i16,  {  8 } }, // pmulhuw+mul+sub sequence
674bdd1243dSDimitry Andric 
675bdd1243dSDimitry Andric     { ISD::SDIV, MVT::v4i32,  { 19 } }, // pmuludq sequence
676bdd1243dSDimitry Andric     { ISD::SREM, MVT::v4i32,  { 24 } }, // pmuludq+mul+sub sequence
677bdd1243dSDimitry Andric     { ISD::UDIV, MVT::v4i32,  { 15 } }, // pmuludq sequence
678bdd1243dSDimitry Andric     { ISD::UREM, MVT::v4i32,  { 20 } }, // pmuludq+mul+sub sequence
679bdd1243dSDimitry Andric   };
680bdd1243dSDimitry Andric 
681bdd1243dSDimitry Andric   if (Op2Info.isConstant() && ST->hasSSE2())
6820b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
683bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
684bdd1243dSDimitry Andric         return LT.first * *KindCost;
6850b57cec5SDimitry Andric 
686bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512BWUniformCostTable[] = {
687bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8,  { 3, 5, 5, 7 } }, // psllw + pand.
688bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8,  { 3,10, 5, 8 } }, // psrlw + pand.
689bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8,  { 4,12, 8,12 } }, // psrlw, pand, pxor, psubb.
690bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i8,  { 4, 7, 6, 8 } }, // psllw + pand.
691bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i8,  { 4, 8, 7, 9 } }, // psrlw + pand.
692bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i8,  { 5,10,10,13 } }, // psrlw, pand, pxor, psubb.
693bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v64i8,  { 4, 7, 6, 8 } }, // psllw + pand.
694bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v64i8,  { 4, 8, 7,10 } }, // psrlw + pand.
695bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v64i8,  { 5,10,10,15 } }, // psrlw, pand, pxor, psubb.
696fe6060f1SDimitry Andric 
697bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i16, { 2, 4, 2, 3 } }, // psllw
698bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i16, { 2, 4, 2, 3 } }, // psrlw
699bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i16, { 2, 4, 2, 3 } }, // psrqw
7005ffd83dbSDimitry Andric   };
7015ffd83dbSDimitry Andric 
702bdd1243dSDimitry Andric   if (ST->hasBWI() && Op2Info.isUniform())
703bdd1243dSDimitry Andric     if (const auto *Entry =
704bdd1243dSDimitry Andric             CostTableLookup(AVX512BWUniformCostTable, ISD, LT.second))
705bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
706bdd1243dSDimitry Andric         return LT.first * *KindCost;
7075ffd83dbSDimitry Andric 
708bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512UniformCostTable[] = {
709bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i16, { 5,10, 5, 7 } }, // psllw + split.
710bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i16, { 5,10, 5, 7 } }, // psrlw + split.
711bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i16, { 5,10, 5, 7 } }, // psraw + split.
712bdd1243dSDimitry Andric 
713bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i32, { 2, 4, 2, 3 } }, // pslld
714bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i32, { 2, 4, 2, 3 } }, // psrld
715bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i32, { 2, 4, 2, 3 } }, // psrad
716bdd1243dSDimitry Andric 
717bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64,  { 1, 2, 1, 2 } }, // psraq
718bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i64,  { 1, 4, 1, 2 } }, // psllq
719bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i64,  { 1, 4, 1, 2 } }, // psrlq
720bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i64,  { 1, 4, 1, 2 } }, // psraq
721bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i64,  { 1, 4, 1, 2 } }, // psllq
722bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i64,  { 1, 4, 1, 2 } }, // psrlq
723bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i64,  { 1, 4, 1, 2 } }, // psraq
724bdd1243dSDimitry Andric   };
725bdd1243dSDimitry Andric 
726bdd1243dSDimitry Andric   if (ST->hasAVX512() && Op2Info.isUniform())
727bdd1243dSDimitry Andric     if (const auto *Entry =
728bdd1243dSDimitry Andric             CostTableLookup(AVX512UniformCostTable, ISD, LT.second))
729bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
730bdd1243dSDimitry Andric         return LT.first * *KindCost;
731bdd1243dSDimitry Andric 
732bdd1243dSDimitry Andric   static const CostKindTblEntry AVX2UniformCostTable[] = {
7330b57cec5SDimitry Andric     // Uniform splats are cheaper for the following instructions.
734bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8,  { 3, 5, 5, 7 } }, // psllw + pand.
735bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8,  { 3, 9, 5, 8 } }, // psrlw + pand.
736bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8,  { 4, 5, 9,13 } }, // psrlw, pand, pxor, psubb.
737bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i8,  { 4, 7, 6, 8 } }, // psllw + pand.
738bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i8,  { 4, 8, 7, 9 } }, // psrlw + pand.
739bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i8,  { 6, 9,11,16 } }, // psrlw, pand, pxor, psubb.
740fe6060f1SDimitry Andric 
741bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i16,  { 1, 2, 1, 2 } }, // psllw.
742bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i16,  { 1, 2, 1, 2 } }, // psrlw.
743bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i16,  { 1, 2, 1, 2 } }, // psraw.
744bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i16, { 2, 4, 2, 3 } }, // psllw.
745bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i16, { 2, 4, 2, 3 } }, // psrlw.
746bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i16, { 2, 4, 2, 3 } }, // psraw.
747bdd1243dSDimitry Andric 
748bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i32,  { 1, 2, 1, 2 } }, // pslld
749bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i32,  { 1, 2, 1, 2 } }, // psrld
750bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i32,  { 1, 2, 1, 2 } }, // psrad
751bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i32,  { 2, 4, 2, 3 } }, // pslld
752bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i32,  { 2, 4, 2, 3 } }, // psrld
753bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i32,  { 2, 4, 2, 3 } }, // psrad
754bdd1243dSDimitry Andric 
755bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v2i64,  { 1, 2, 1, 2 } }, // psllq
756bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v2i64,  { 1, 2, 1, 2 } }, // psrlq
757bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64,  { 2, 4, 5, 7 } }, // 2 x psrad + shuffle.
758bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i64,  { 2, 4, 1, 2 } }, // psllq
759bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i64,  { 2, 4, 1, 2 } }, // psrlq
760bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i64,  { 4, 6, 5, 9 } }, // 2 x psrad + shuffle.
7610b57cec5SDimitry Andric   };
7620b57cec5SDimitry Andric 
763bdd1243dSDimitry Andric   if (ST->hasAVX2() && Op2Info.isUniform())
7640b57cec5SDimitry Andric     if (const auto *Entry =
7650b57cec5SDimitry Andric             CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
766bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
767bdd1243dSDimitry Andric         return LT.first * *KindCost;
7680b57cec5SDimitry Andric 
769bdd1243dSDimitry Andric   static const CostKindTblEntry AVXUniformCostTable[] = {
770bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8,  {  4, 4, 6, 8 } }, // psllw + pand.
771bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8,  {  4, 8, 5, 8 } }, // psrlw + pand.
772bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8,  {  6, 6, 9,13 } }, // psrlw, pand, pxor, psubb.
773bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i8,  {  7, 8,11,14 } }, // psllw + pand + split.
774bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i8,  {  7, 9,10,14 } }, // psrlw + pand + split.
775bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i8,  { 10,11,16,21 } }, // psrlw, pand, pxor, psubb + split.
7760b57cec5SDimitry Andric 
777bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i16,  {  1, 3, 1, 2 } }, // psllw.
778bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i16,  {  1, 3, 1, 2 } }, // psrlw.
779bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i16,  {  1, 3, 1, 2 } }, // psraw.
780bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i16, {  3, 7, 5, 7 } }, // psllw + split.
781bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i16, {  3, 7, 5, 7 } }, // psrlw + split.
782bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i16, {  3, 7, 5, 7 } }, // psraw + split.
7830b57cec5SDimitry Andric 
784bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i32,  {  1, 3, 1, 2 } }, // pslld.
785bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i32,  {  1, 3, 1, 2 } }, // psrld.
786bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i32,  {  1, 3, 1, 2 } }, // psrad.
787bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i32,  {  3, 7, 5, 7 } }, // pslld + split.
788bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i32,  {  3, 7, 5, 7 } }, // psrld + split.
789bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i32,  {  3, 7, 5, 7 } }, // psrad + split.
790bdd1243dSDimitry Andric 
791bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v2i64,  {  1, 3, 1, 2 } }, // psllq.
792bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v2i64,  {  1, 3, 1, 2 } }, // psrlq.
793bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64,  {  3, 4, 5, 7 } }, // 2 x psrad + shuffle.
794bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i64,  {  3, 7, 4, 6 } }, // psllq + split.
795bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i64,  {  3, 7, 4, 6 } }, // psrlq + split.
796bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i64,  {  6, 7,10,13 } }, // 2 x (2 x psrad + shuffle) + split.
7970b57cec5SDimitry Andric   };
7980b57cec5SDimitry Andric 
799bdd1243dSDimitry Andric   // XOP has faster vXi8 shifts.
800bdd1243dSDimitry Andric   if (ST->hasAVX() && Op2Info.isUniform() &&
801bdd1243dSDimitry Andric       (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
802bdd1243dSDimitry Andric     if (const auto *Entry =
803bdd1243dSDimitry Andric             CostTableLookup(AVXUniformCostTable, ISD, LT.second))
804bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
805bdd1243dSDimitry Andric         return LT.first * *KindCost;
806bdd1243dSDimitry Andric 
807bdd1243dSDimitry Andric   static const CostKindTblEntry SSE2UniformCostTable[] = {
808bdd1243dSDimitry Andric     // Uniform splats are cheaper for the following instructions.
809bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8, {  9, 10, 6, 9 } }, // psllw + pand.
810bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8, {  9, 13, 5, 9 } }, // psrlw + pand.
811bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8, { 11, 15, 9,13 } }, // pcmpgtb sequence.
812bdd1243dSDimitry Andric 
813bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i16, {  2, 2, 1, 2 } }, // psllw.
814bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i16, {  2, 2, 1, 2 } }, // psrlw.
815bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i16, {  2, 2, 1, 2 } }, // psraw.
816bdd1243dSDimitry Andric 
817bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i32, {  2, 2, 1, 2 } }, // pslld
818bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i32, {  2, 2, 1, 2 } }, // psrld.
819bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i32, {  2, 2, 1, 2 } }, // psrad.
820bdd1243dSDimitry Andric 
821bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v2i64, {  2, 2, 1, 2 } }, // psllq.
822bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v2i64, {  2, 2, 1, 2 } }, // psrlq.
823bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64, {  5, 9, 5, 7 } }, // 2*psrlq + xor + sub.
824bdd1243dSDimitry Andric   };
825bdd1243dSDimitry Andric 
826bdd1243dSDimitry Andric   if (ST->hasSSE2() && Op2Info.isUniform() &&
827bdd1243dSDimitry Andric       (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
8280b57cec5SDimitry Andric     if (const auto *Entry =
8290b57cec5SDimitry Andric             CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
830bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
831bdd1243dSDimitry Andric         return LT.first * *KindCost;
8320b57cec5SDimitry Andric 
833bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512DQCostTable[] = {
834bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v2i64, { 2, 15, 1, 3 } }, // pmullq
835bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v4i64, { 2, 15, 1, 3 } }, // pmullq
836bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v8i64, { 3, 15, 1, 3 } }  // pmullq
8370b57cec5SDimitry Andric   };
8380b57cec5SDimitry Andric 
8390b57cec5SDimitry Andric   // Look for AVX512DQ lowering tricks for custom cases.
8400b57cec5SDimitry Andric   if (ST->hasDQI())
8410b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, LT.second))
842bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
843bdd1243dSDimitry Andric         return LT.first * *KindCost;
8440b57cec5SDimitry Andric 
845bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512BWCostTable[] = {
846bdd1243dSDimitry Andric     { ISD::SHL,   MVT::v16i8,   {  4,  8, 4, 5 } }, // extend/vpsllvw/pack sequence.
847bdd1243dSDimitry Andric     { ISD::SRL,   MVT::v16i8,   {  4,  8, 4, 5 } }, // extend/vpsrlvw/pack sequence.
848bdd1243dSDimitry Andric     { ISD::SRA,   MVT::v16i8,   {  4,  8, 4, 5 } }, // extend/vpsravw/pack sequence.
849bdd1243dSDimitry Andric     { ISD::SHL,   MVT::v32i8,   {  4, 23,11,16 } }, // extend/vpsllvw/pack sequence.
850bdd1243dSDimitry Andric     { ISD::SRL,   MVT::v32i8,   {  4, 30,12,18 } }, // extend/vpsrlvw/pack sequence.
851bdd1243dSDimitry Andric     { ISD::SRA,   MVT::v32i8,   {  6, 13,24,30 } }, // extend/vpsravw/pack sequence.
852bdd1243dSDimitry Andric     { ISD::SHL,   MVT::v64i8,   {  6, 19,13,15 } }, // extend/vpsllvw/pack sequence.
853bdd1243dSDimitry Andric     { ISD::SRL,   MVT::v64i8,   {  7, 27,15,18 } }, // extend/vpsrlvw/pack sequence.
854bdd1243dSDimitry Andric     { ISD::SRA,   MVT::v64i8,   { 15, 15,30,30 } }, // extend/vpsravw/pack sequence.
855bdd1243dSDimitry Andric 
856bdd1243dSDimitry Andric     { ISD::SHL,   MVT::v8i16,   {  1,  1, 1, 1 } }, // vpsllvw
857bdd1243dSDimitry Andric     { ISD::SRL,   MVT::v8i16,   {  1,  1, 1, 1 } }, // vpsrlvw
858bdd1243dSDimitry Andric     { ISD::SRA,   MVT::v8i16,   {  1,  1, 1, 1 } }, // vpsravw
859bdd1243dSDimitry Andric     { ISD::SHL,   MVT::v16i16,  {  1,  1, 1, 1 } }, // vpsllvw
860bdd1243dSDimitry Andric     { ISD::SRL,   MVT::v16i16,  {  1,  1, 1, 1 } }, // vpsrlvw
861bdd1243dSDimitry Andric     { ISD::SRA,   MVT::v16i16,  {  1,  1, 1, 1 } }, // vpsravw
862bdd1243dSDimitry Andric     { ISD::SHL,   MVT::v32i16,  {  1,  1, 1, 1 } }, // vpsllvw
863bdd1243dSDimitry Andric     { ISD::SRL,   MVT::v32i16,  {  1,  1, 1, 1 } }, // vpsrlvw
864bdd1243dSDimitry Andric     { ISD::SRA,   MVT::v32i16,  {  1,  1, 1, 1 } }, // vpsravw
865bdd1243dSDimitry Andric 
866bdd1243dSDimitry Andric     { ISD::ADD,   MVT::v64i8,   {  1,  1, 1, 1 } }, // paddb
867bdd1243dSDimitry Andric     { ISD::ADD,   MVT::v32i16,  {  1,  1, 1, 1 } }, // paddw
868bdd1243dSDimitry Andric 
869bdd1243dSDimitry Andric     { ISD::ADD,   MVT::v32i8,   {  1,  1, 1, 1 } }, // paddb
870bdd1243dSDimitry Andric     { ISD::ADD,   MVT::v16i16,  {  1,  1, 1, 1 } }, // paddw
871bdd1243dSDimitry Andric     { ISD::ADD,   MVT::v8i32,   {  1,  1, 1, 1 } }, // paddd
872bdd1243dSDimitry Andric     { ISD::ADD,   MVT::v4i64,   {  1,  1, 1, 1 } }, // paddq
873bdd1243dSDimitry Andric 
874bdd1243dSDimitry Andric     { ISD::SUB,   MVT::v64i8,   {  1,  1, 1, 1 } }, // psubb
875bdd1243dSDimitry Andric     { ISD::SUB,   MVT::v32i16,  {  1,  1, 1, 1 } }, // psubw
876bdd1243dSDimitry Andric 
877*0fca6ea1SDimitry Andric     { ISD::MUL,   MVT::v16i8,   {  4, 12, 4, 5 } }, // extend/pmullw/trunc
878*0fca6ea1SDimitry Andric     { ISD::MUL,   MVT::v32i8,   {  3, 10, 7,10 } }, // pmaddubsw
879*0fca6ea1SDimitry Andric     { ISD::MUL,   MVT::v64i8,   {  3, 11, 7,10 } }, // pmaddubsw
880bdd1243dSDimitry Andric     { ISD::MUL,   MVT::v32i16,  {  1,  5, 1, 1 } }, // pmullw
881bdd1243dSDimitry Andric 
882bdd1243dSDimitry Andric     { ISD::SUB,   MVT::v32i8,   {  1,  1, 1, 1 } }, // psubb
883bdd1243dSDimitry Andric     { ISD::SUB,   MVT::v16i16,  {  1,  1, 1, 1 } }, // psubw
884bdd1243dSDimitry Andric     { ISD::SUB,   MVT::v8i32,   {  1,  1, 1, 1 } }, // psubd
885bdd1243dSDimitry Andric     { ISD::SUB,   MVT::v4i64,   {  1,  1, 1, 1 } }, // psubq
8860b57cec5SDimitry Andric   };
8870b57cec5SDimitry Andric 
8880b57cec5SDimitry Andric   // Look for AVX512BW lowering tricks for custom cases.
8890b57cec5SDimitry Andric   if (ST->hasBWI())
8900b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, LT.second))
891bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
892bdd1243dSDimitry Andric         return LT.first * *KindCost;
8930b57cec5SDimitry Andric 
894bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512CostTable[] = {
895bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v64i8,   { 15, 19,27,33 } }, // vpblendv+split sequence.
896bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v64i8,   { 15, 19,30,36 } }, // vpblendv+split sequence.
897bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v64i8,   { 37, 37,51,63 } }, // vpblendv+split sequence.
8980b57cec5SDimitry Andric 
899bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v32i16,  { 11, 16,11,15 } }, // 2*extend/vpsrlvd/pack sequence.
900bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v32i16,  { 11, 16,11,15 } }, // 2*extend/vpsrlvd/pack sequence.
901bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v32i16,  { 11, 16,11,15 } }, // 2*extend/vpsravd/pack sequence.
9020b57cec5SDimitry Andric 
903bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v4i32,   {  1,  1, 1, 1 } },
904bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v4i32,   {  1,  1, 1, 1 } },
905bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v4i32,   {  1,  1, 1, 1 } },
906bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v8i32,   {  1,  1, 1, 1 } },
907bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v8i32,   {  1,  1, 1, 1 } },
908bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v8i32,   {  1,  1, 1, 1 } },
909bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v16i32,  {  1,  1, 1, 1 } },
910bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v16i32,  {  1,  1, 1, 1 } },
911bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v16i32,  {  1,  1, 1, 1 } },
9120b57cec5SDimitry Andric 
913bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v2i64,   {  1,  1, 1, 1 } },
914bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v2i64,   {  1,  1, 1, 1 } },
915bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v2i64,   {  1,  1, 1, 1 } },
916bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v4i64,   {  1,  1, 1, 1 } },
917bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v4i64,   {  1,  1, 1, 1 } },
918bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v4i64,   {  1,  1, 1, 1 } },
919bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v8i64,   {  1,  1, 1, 1 } },
920bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v8i64,   {  1,  1, 1, 1 } },
921bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v8i64,   {  1,  1, 1, 1 } },
9220b57cec5SDimitry Andric 
923bdd1243dSDimitry Andric     { ISD::ADD,     MVT::v64i8,   {  3,  7, 5, 5 } }, // 2*paddb + split
924bdd1243dSDimitry Andric     { ISD::ADD,     MVT::v32i16,  {  3,  7, 5, 5 } }, // 2*paddw + split
9250b57cec5SDimitry Andric 
926bdd1243dSDimitry Andric     { ISD::SUB,     MVT::v64i8,   {  3,  7, 5, 5 } }, // 2*psubb + split
927bdd1243dSDimitry Andric     { ISD::SUB,     MVT::v32i16,  {  3,  7, 5, 5 } }, // 2*psubw + split
928bdd1243dSDimitry Andric 
929bdd1243dSDimitry Andric     { ISD::AND,     MVT::v32i8,   {  1,  1, 1, 1 } },
930bdd1243dSDimitry Andric     { ISD::AND,     MVT::v16i16,  {  1,  1, 1, 1 } },
931bdd1243dSDimitry Andric     { ISD::AND,     MVT::v8i32,   {  1,  1, 1, 1 } },
932bdd1243dSDimitry Andric     { ISD::AND,     MVT::v4i64,   {  1,  1, 1, 1 } },
933bdd1243dSDimitry Andric 
934bdd1243dSDimitry Andric     { ISD::OR,      MVT::v32i8,   {  1,  1, 1, 1 } },
935bdd1243dSDimitry Andric     { ISD::OR,      MVT::v16i16,  {  1,  1, 1, 1 } },
936bdd1243dSDimitry Andric     { ISD::OR,      MVT::v8i32,   {  1,  1, 1, 1 } },
937bdd1243dSDimitry Andric     { ISD::OR,      MVT::v4i64,   {  1,  1, 1, 1 } },
938bdd1243dSDimitry Andric 
939bdd1243dSDimitry Andric     { ISD::XOR,     MVT::v32i8,   {  1,  1, 1, 1 } },
940bdd1243dSDimitry Andric     { ISD::XOR,     MVT::v16i16,  {  1,  1, 1, 1 } },
941bdd1243dSDimitry Andric     { ISD::XOR,     MVT::v8i32,   {  1,  1, 1, 1 } },
942bdd1243dSDimitry Andric     { ISD::XOR,     MVT::v4i64,   {  1,  1, 1, 1 } },
943bdd1243dSDimitry Andric 
944bdd1243dSDimitry Andric     { ISD::MUL,     MVT::v16i32,  {  1, 10, 1, 2 } }, // pmulld (Skylake from agner.org)
945bdd1243dSDimitry Andric     { ISD::MUL,     MVT::v8i32,   {  1, 10, 1, 2 } }, // pmulld (Skylake from agner.org)
946bdd1243dSDimitry Andric     { ISD::MUL,     MVT::v4i32,   {  1, 10, 1, 2 } }, // pmulld (Skylake from agner.org)
947bdd1243dSDimitry Andric     { ISD::MUL,     MVT::v8i64,   {  6,  9, 8, 8 } }, // 3*pmuludq/3*shift/2*add
948bdd1243dSDimitry Andric     { ISD::MUL,     MVT::i64,     {  1 } }, // Skylake from http://www.agner.org/
949bdd1243dSDimitry Andric 
95006c3fb27SDimitry Andric     { X86ISD::PMULUDQ, MVT::v8i64, { 1,  5, 1, 1 } },
95106c3fb27SDimitry Andric 
952bdd1243dSDimitry Andric     { ISD::FNEG,    MVT::v8f64,   {  1,  1, 1, 2 } }, // Skylake from http://www.agner.org/
953bdd1243dSDimitry Andric     { ISD::FADD,    MVT::v8f64,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
954bdd1243dSDimitry Andric     { ISD::FADD,    MVT::v4f64,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
955bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::v8f64,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
956bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::v4f64,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
957bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v8f64,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
958bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v4f64,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
959bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v2f64,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
960bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::f64,     {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
961bdd1243dSDimitry Andric 
962bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::f64,     {  4, 14, 1, 1 } }, // Skylake from http://www.agner.org/
963bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v2f64,   {  4, 14, 1, 1 } }, // Skylake from http://www.agner.org/
964bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v4f64,   {  8, 14, 1, 1 } }, // Skylake from http://www.agner.org/
965bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v8f64,   { 16, 23, 1, 3 } }, // Skylake from http://www.agner.org/
966bdd1243dSDimitry Andric 
967bdd1243dSDimitry Andric     { ISD::FNEG,    MVT::v16f32,  {  1,  1, 1, 2 } }, // Skylake from http://www.agner.org/
968bdd1243dSDimitry Andric     { ISD::FADD,    MVT::v16f32,  {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
969bdd1243dSDimitry Andric     { ISD::FADD,    MVT::v8f32,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
970bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::v16f32,  {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
971bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::v8f32,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
972bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v16f32,  {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
973bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v8f32,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
974bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v4f32,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
975bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::f32,     {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
976bdd1243dSDimitry Andric 
977bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::f32,     {  3, 11, 1, 1 } }, // Skylake from http://www.agner.org/
978bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v4f32,   {  3, 11, 1, 1 } }, // Skylake from http://www.agner.org/
979bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v8f32,   {  5, 11, 1, 1 } }, // Skylake from http://www.agner.org/
980bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v16f32,  { 10, 18, 1, 3 } }, // Skylake from http://www.agner.org/
9810b57cec5SDimitry Andric   };
9820b57cec5SDimitry Andric 
9830b57cec5SDimitry Andric   if (ST->hasAVX512())
9840b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
985bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
986bdd1243dSDimitry Andric         return LT.first * *KindCost;
9870b57cec5SDimitry Andric 
988bdd1243dSDimitry Andric   static const CostKindTblEntry AVX2ShiftCostTable[] = {
989fe6060f1SDimitry Andric     // Shifts on vXi64/vXi32 on AVX2 is legal even though we declare to
9900b57cec5SDimitry Andric     // customize them to detect the cases where shift amount is a scalar one.
991bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v4i32,  { 2, 3, 1, 3 } }, // vpsllvd (Haswell from agner.org)
992bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v4i32,  { 2, 3, 1, 3 } }, // vpsrlvd (Haswell from agner.org)
993bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v4i32,  { 2, 3, 1, 3 } }, // vpsravd (Haswell from agner.org)
994bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v8i32,  { 4, 4, 1, 3 } }, // vpsllvd (Haswell from agner.org)
995bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v8i32,  { 4, 4, 1, 3 } }, // vpsrlvd (Haswell from agner.org)
996bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v8i32,  { 4, 4, 1, 3 } }, // vpsravd (Haswell from agner.org)
997bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v2i64,  { 2, 3, 1, 1 } }, // vpsllvq (Haswell from agner.org)
998bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v2i64,  { 2, 3, 1, 1 } }, // vpsrlvq (Haswell from agner.org)
999bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v4i64,  { 4, 4, 1, 2 } }, // vpsllvq (Haswell from agner.org)
1000bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v4i64,  { 4, 4, 1, 2 } }, // vpsrlvq (Haswell from agner.org)
10010b57cec5SDimitry Andric   };
10020b57cec5SDimitry Andric 
10035ffd83dbSDimitry Andric   if (ST->hasAVX512()) {
1004bdd1243dSDimitry Andric     if (ISD == ISD::SHL && LT.second == MVT::v32i16 && Op2Info.isConstant())
10055ffd83dbSDimitry Andric       // On AVX512, a packed v32i16 shift left by a constant build_vector
10065ffd83dbSDimitry Andric       // is lowered into a vector multiply (vpmullw).
10075ffd83dbSDimitry Andric       return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
1008bdd1243dSDimitry Andric                                     Op1Info.getNoProps(), Op2Info.getNoProps());
10095ffd83dbSDimitry Andric   }
10105ffd83dbSDimitry Andric 
1011fe6060f1SDimitry Andric   // Look for AVX2 lowering tricks (XOP is always better at v4i32 shifts).
1012fe6060f1SDimitry Andric   if (ST->hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
10130b57cec5SDimitry Andric     if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
1014bdd1243dSDimitry Andric         Op2Info.isConstant())
10150b57cec5SDimitry Andric       // On AVX2, a packed v16i16 shift left by a constant build_vector
10160b57cec5SDimitry Andric       // is lowered into a vector multiply (vpmullw).
10175ffd83dbSDimitry Andric       return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
1018bdd1243dSDimitry Andric                                     Op1Info.getNoProps(), Op2Info.getNoProps());
10190b57cec5SDimitry Andric 
10200b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
1021bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1022bdd1243dSDimitry Andric         return LT.first * *KindCost;
10230b57cec5SDimitry Andric   }
10240b57cec5SDimitry Andric 
1025bdd1243dSDimitry Andric   static const CostKindTblEntry XOPShiftCostTable[] = {
10260b57cec5SDimitry Andric     // 128bit shifts take 1cy, but right shifts require negation beforehand.
1027bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v16i8,  { 1, 3, 1, 1 } },
1028bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v16i8,  { 2, 3, 1, 1 } },
1029bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v16i8,  { 2, 3, 1, 1 } },
1030bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v8i16,  { 1, 3, 1, 1 } },
1031bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v8i16,  { 2, 3, 1, 1 } },
1032bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v8i16,  { 2, 3, 1, 1 } },
1033bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v4i32,  { 1, 3, 1, 1 } },
1034bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v4i32,  { 2, 3, 1, 1 } },
1035bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v4i32,  { 2, 3, 1, 1 } },
1036bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v2i64,  { 1, 3, 1, 1 } },
1037bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v2i64,  { 2, 3, 1, 1 } },
1038bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v2i64,  { 2, 3, 1, 1 } },
10390b57cec5SDimitry Andric     // 256bit shifts require splitting if AVX2 didn't catch them above.
1040bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v32i8,  { 4, 7, 5, 6 } },
1041bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v32i8,  { 6, 7, 5, 6 } },
1042bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v32i8,  { 6, 7, 5, 6 } },
1043bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v16i16, { 4, 7, 5, 6 } },
1044bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v16i16, { 6, 7, 5, 6 } },
1045bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v16i16, { 6, 7, 5, 6 } },
1046bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v8i32,  { 4, 7, 5, 6 } },
1047bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v8i32,  { 6, 7, 5, 6 } },
1048bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v8i32,  { 6, 7, 5, 6 } },
1049bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v4i64,  { 4, 7, 5, 6 } },
1050bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v4i64,  { 6, 7, 5, 6 } },
1051bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v4i64,  { 6, 7, 5, 6 } },
10520b57cec5SDimitry Andric   };
10530b57cec5SDimitry Andric 
10540b57cec5SDimitry Andric   // Look for XOP lowering tricks.
10550b57cec5SDimitry Andric   if (ST->hasXOP()) {
10560b57cec5SDimitry Andric     // If the right shift is constant then we'll fold the negation so
10570b57cec5SDimitry Andric     // it's as cheap as a left shift.
10580b57cec5SDimitry Andric     int ShiftISD = ISD;
1059bdd1243dSDimitry Andric     if ((ShiftISD == ISD::SRL || ShiftISD == ISD::SRA) && Op2Info.isConstant())
10600b57cec5SDimitry Andric       ShiftISD = ISD::SHL;
10610b57cec5SDimitry Andric     if (const auto *Entry =
10620b57cec5SDimitry Andric             CostTableLookup(XOPShiftCostTable, ShiftISD, LT.second))
1063bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1064bdd1243dSDimitry Andric         return LT.first * *KindCost;
10650b57cec5SDimitry Andric   }
10660b57cec5SDimitry Andric 
1067bdd1243dSDimitry Andric   if (ISD == ISD::SHL && !Op2Info.isUniform() && Op2Info.isConstant()) {
10680b57cec5SDimitry Andric     MVT VT = LT.second;
10690b57cec5SDimitry Andric     // Vector shift left by non uniform constant can be lowered
10700b57cec5SDimitry Andric     // into vector multiply.
10710b57cec5SDimitry Andric     if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
10720b57cec5SDimitry Andric         ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
10730b57cec5SDimitry Andric       ISD = ISD::MUL;
10740b57cec5SDimitry Andric   }
10750b57cec5SDimitry Andric 
1076bdd1243dSDimitry Andric   static const CostKindTblEntry GLMCostTable[] = {
1077bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::f32,   { 18, 19, 1, 1 } }, // divss
1078bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::v4f32, { 35, 36, 1, 1 } }, // divps
1079bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::f64,   { 33, 34, 1, 1 } }, // divsd
1080bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::v2f64, { 65, 66, 1, 1 } }, // divpd
1081bdd1243dSDimitry Andric   };
10820b57cec5SDimitry Andric 
1083bdd1243dSDimitry Andric   if (ST->useGLMDivSqrtCosts())
1084bdd1243dSDimitry Andric     if (const auto *Entry = CostTableLookup(GLMCostTable, ISD, LT.second))
1085bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1086bdd1243dSDimitry Andric         return LT.first * *KindCost;
10870b57cec5SDimitry Andric 
1088bdd1243dSDimitry Andric   static const CostKindTblEntry SLMCostTable[] = {
1089bdd1243dSDimitry Andric     { ISD::MUL,   MVT::v4i32, { 11, 11, 1, 7 } }, // pmulld
1090bdd1243dSDimitry Andric     { ISD::MUL,   MVT::v8i16, {  2,  5, 1, 1 } }, // pmullw
1091bdd1243dSDimitry Andric     { ISD::FMUL,  MVT::f64,   {  2,  5, 1, 1 } }, // mulsd
1092bdd1243dSDimitry Andric     { ISD::FMUL,  MVT::f32,   {  1,  4, 1, 1 } }, // mulss
1093bdd1243dSDimitry Andric     { ISD::FMUL,  MVT::v2f64, {  4,  7, 1, 1 } }, // mulpd
1094bdd1243dSDimitry Andric     { ISD::FMUL,  MVT::v4f32, {  2,  5, 1, 1 } }, // mulps
1095bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::f32,   { 17, 19, 1, 1 } }, // divss
1096bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::v4f32, { 39, 39, 1, 6 } }, // divps
1097bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::f64,   { 32, 34, 1, 1 } }, // divsd
1098bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::v2f64, { 69, 69, 1, 6 } }, // divpd
1099bdd1243dSDimitry Andric     { ISD::FADD,  MVT::v2f64, {  2,  4, 1, 1 } }, // addpd
1100bdd1243dSDimitry Andric     { ISD::FSUB,  MVT::v2f64, {  2,  4, 1, 1 } }, // subpd
1101bdd1243dSDimitry Andric     // v2i64/v4i64 mul is custom lowered as a series of long:
1102bdd1243dSDimitry Andric     // multiplies(3), shifts(3) and adds(2)
1103bdd1243dSDimitry Andric     // slm muldq version throughput is 2 and addq throughput 4
1104bdd1243dSDimitry Andric     // thus: 3X2 (muldq throughput) + 3X1 (shift throughput) +
1105bdd1243dSDimitry Andric     //       3X4 (addq throughput) = 17
1106bdd1243dSDimitry Andric     { ISD::MUL,   MVT::v2i64, { 17, 22, 9, 9 } },
1107bdd1243dSDimitry Andric     // slm addq\subq throughput is 4
1108bdd1243dSDimitry Andric     { ISD::ADD,   MVT::v2i64, {  4,  2, 1, 2 } },
1109bdd1243dSDimitry Andric     { ISD::SUB,   MVT::v2i64, {  4,  2, 1, 2 } },
1110bdd1243dSDimitry Andric   };
11110b57cec5SDimitry Andric 
1112bdd1243dSDimitry Andric   if (ST->useSLMArithCosts())
1113bdd1243dSDimitry Andric     if (const auto *Entry = CostTableLookup(SLMCostTable, ISD, LT.second))
1114bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1115bdd1243dSDimitry Andric         return LT.first * *KindCost;
11160b57cec5SDimitry Andric 
1117bdd1243dSDimitry Andric   static const CostKindTblEntry AVX2CostTable[] = {
1118bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8,   {  6, 21,11,16 } }, // vpblendvb sequence.
1119bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v32i8,   {  6, 23,11,22 } }, // vpblendvb sequence.
1120bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i16,   {  5, 18, 5,10 } }, // extend/vpsrlvd/pack sequence.
1121bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i16,  {  8, 10,10,14 } }, // extend/vpsrlvd/pack sequence.
11220b57cec5SDimitry Andric 
1123bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8,   {  6, 27,12,18 } }, // vpblendvb sequence.
1124bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v32i8,   {  8, 30,12,24 } }, // vpblendvb sequence.
1125bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i16,   {  5, 11, 5,10 } }, // extend/vpsrlvd/pack sequence.
1126bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i16,  {  8, 10,10,14 } }, // extend/vpsrlvd/pack sequence.
11270b57cec5SDimitry Andric 
1128bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8,   { 17, 17,24,30 } }, // vpblendvb sequence.
1129bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v32i8,   { 18, 20,24,43 } }, // vpblendvb sequence.
1130bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i16,   {  5, 11, 5,10 } }, // extend/vpsravd/pack sequence.
1131bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i16,  {  8, 10,10,14 } }, // extend/vpsravd/pack sequence.
1132bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64,   {  4,  5, 5, 5 } }, // srl/xor/sub sequence.
1133bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i64,   {  8,  8, 5, 9 } }, // srl/xor/sub sequence.
1134bdd1243dSDimitry Andric 
1135bdd1243dSDimitry Andric     { ISD::SUB,  MVT::v32i8,   {  1,  1, 1, 2 } }, // psubb
1136bdd1243dSDimitry Andric     { ISD::ADD,  MVT::v32i8,   {  1,  1, 1, 2 } }, // paddb
1137bdd1243dSDimitry Andric     { ISD::SUB,  MVT::v16i16,  {  1,  1, 1, 2 } }, // psubw
1138bdd1243dSDimitry Andric     { ISD::ADD,  MVT::v16i16,  {  1,  1, 1, 2 } }, // paddw
1139bdd1243dSDimitry Andric     { ISD::SUB,  MVT::v8i32,   {  1,  1, 1, 2 } }, // psubd
1140bdd1243dSDimitry Andric     { ISD::ADD,  MVT::v8i32,   {  1,  1, 1, 2 } }, // paddd
1141bdd1243dSDimitry Andric     { ISD::SUB,  MVT::v4i64,   {  1,  1, 1, 2 } }, // psubq
1142bdd1243dSDimitry Andric     { ISD::ADD,  MVT::v4i64,   {  1,  1, 1, 2 } }, // paddq
1143bdd1243dSDimitry Andric 
114406c3fb27SDimitry Andric     { ISD::MUL,  MVT::v16i8,   {  5, 18, 6,12 } }, // extend/pmullw/pack
1145*0fca6ea1SDimitry Andric     { ISD::MUL,  MVT::v32i8,   {  4,  8, 8,16 } }, // pmaddubsw
114606c3fb27SDimitry Andric     { ISD::MUL,  MVT::v16i16,  {  2,  5, 1, 2 } }, // pmullw
1147bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v8i32,   {  4, 10, 1, 2 } }, // pmulld
1148bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v4i32,   {  2, 10, 1, 2 } }, // pmulld
1149bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v4i64,   {  6, 10, 8,13 } }, // 3*pmuludq/3*shift/2*add
1150bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v2i64,   {  6, 10, 8, 8 } }, // 3*pmuludq/3*shift/2*add
1151bdd1243dSDimitry Andric 
115206c3fb27SDimitry Andric     { X86ISD::PMULUDQ, MVT::v4i64, { 1,  5, 1, 1 } },
115306c3fb27SDimitry Andric 
1154bdd1243dSDimitry Andric     { ISD::FNEG, MVT::v4f64,   {  1,  1, 1, 2 } }, // vxorpd
1155bdd1243dSDimitry Andric     { ISD::FNEG, MVT::v8f32,   {  1,  1, 1, 2 } }, // vxorps
1156bdd1243dSDimitry Andric 
1157bdd1243dSDimitry Andric     { ISD::FADD, MVT::f64,     {  1,  4, 1, 1 } }, // vaddsd
1158bdd1243dSDimitry Andric     { ISD::FADD, MVT::f32,     {  1,  4, 1, 1 } }, // vaddss
1159bdd1243dSDimitry Andric     { ISD::FADD, MVT::v2f64,   {  1,  4, 1, 1 } }, // vaddpd
1160bdd1243dSDimitry Andric     { ISD::FADD, MVT::v4f32,   {  1,  4, 1, 1 } }, // vaddps
1161bdd1243dSDimitry Andric     { ISD::FADD, MVT::v4f64,   {  1,  4, 1, 2 } }, // vaddpd
1162bdd1243dSDimitry Andric     { ISD::FADD, MVT::v8f32,   {  1,  4, 1, 2 } }, // vaddps
1163bdd1243dSDimitry Andric 
1164bdd1243dSDimitry Andric     { ISD::FSUB, MVT::f64,     {  1,  4, 1, 1 } }, // vsubsd
1165bdd1243dSDimitry Andric     { ISD::FSUB, MVT::f32,     {  1,  4, 1, 1 } }, // vsubss
1166bdd1243dSDimitry Andric     { ISD::FSUB, MVT::v2f64,   {  1,  4, 1, 1 } }, // vsubpd
1167bdd1243dSDimitry Andric     { ISD::FSUB, MVT::v4f32,   {  1,  4, 1, 1 } }, // vsubps
1168bdd1243dSDimitry Andric     { ISD::FSUB, MVT::v4f64,   {  1,  4, 1, 2 } }, // vsubpd
1169bdd1243dSDimitry Andric     { ISD::FSUB, MVT::v8f32,   {  1,  4, 1, 2 } }, // vsubps
1170bdd1243dSDimitry Andric 
1171bdd1243dSDimitry Andric     { ISD::FMUL, MVT::f64,     {  1,  5, 1, 1 } }, // vmulsd
1172bdd1243dSDimitry Andric     { ISD::FMUL, MVT::f32,     {  1,  5, 1, 1 } }, // vmulss
1173bdd1243dSDimitry Andric     { ISD::FMUL, MVT::v2f64,   {  1,  5, 1, 1 } }, // vmulpd
1174bdd1243dSDimitry Andric     { ISD::FMUL, MVT::v4f32,   {  1,  5, 1, 1 } }, // vmulps
1175bdd1243dSDimitry Andric     { ISD::FMUL, MVT::v4f64,   {  1,  5, 1, 2 } }, // vmulpd
1176bdd1243dSDimitry Andric     { ISD::FMUL, MVT::v8f32,   {  1,  5, 1, 2 } }, // vmulps
1177bdd1243dSDimitry Andric 
1178bdd1243dSDimitry Andric     { ISD::FDIV, MVT::f32,     {  7, 13, 1, 1 } }, // vdivss
1179bdd1243dSDimitry Andric     { ISD::FDIV, MVT::v4f32,   {  7, 13, 1, 1 } }, // vdivps
1180bdd1243dSDimitry Andric     { ISD::FDIV, MVT::v8f32,   { 14, 21, 1, 3 } }, // vdivps
1181bdd1243dSDimitry Andric     { ISD::FDIV, MVT::f64,     { 14, 20, 1, 1 } }, // vdivsd
1182bdd1243dSDimitry Andric     { ISD::FDIV, MVT::v2f64,   { 14, 20, 1, 1 } }, // vdivpd
1183bdd1243dSDimitry Andric     { ISD::FDIV, MVT::v4f64,   { 28, 35, 1, 3 } }, // vdivpd
11840b57cec5SDimitry Andric   };
11850b57cec5SDimitry Andric 
11860b57cec5SDimitry Andric   // Look for AVX2 lowering tricks for custom cases.
11870b57cec5SDimitry Andric   if (ST->hasAVX2())
11880b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
1189bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1190bdd1243dSDimitry Andric         return LT.first * *KindCost;
11910b57cec5SDimitry Andric 
1192bdd1243dSDimitry Andric   static const CostKindTblEntry AVX1CostTable[] = {
11930b57cec5SDimitry Andric     // We don't have to scalarize unsupported ops. We can issue two half-sized
11940b57cec5SDimitry Andric     // operations and we only need to extract the upper YMM half.
11950b57cec5SDimitry Andric     // Two ops + 1 extract + 1 insert = 4.
1196*0fca6ea1SDimitry Andric     { ISD::MUL,     MVT::v32i8,   { 10, 11, 18, 19 } }, // pmaddubsw + split
1197*0fca6ea1SDimitry Andric     { ISD::MUL,     MVT::v16i8,   {  5,  6,  8, 12 } }, // 2*pmaddubsw/3*and/psllw/or
1198bdd1243dSDimitry Andric     { ISD::MUL,     MVT::v16i16,  {  4,  8,  5,  6 } }, // pmullw + split
1199bdd1243dSDimitry Andric     { ISD::MUL,     MVT::v8i32,   {  5,  8,  5, 10 } }, // pmulld + split
1200bdd1243dSDimitry Andric     { ISD::MUL,     MVT::v4i32,   {  2,  5,  1,  3 } }, // pmulld
1201bdd1243dSDimitry Andric     { ISD::MUL,     MVT::v4i64,   { 12, 15, 19, 20 } },
1202fe6060f1SDimitry Andric 
1203bdd1243dSDimitry Andric     { ISD::AND,     MVT::v32i8,   {  1,  1, 1, 2 } }, // vandps
1204bdd1243dSDimitry Andric     { ISD::AND,     MVT::v16i16,  {  1,  1, 1, 2 } }, // vandps
1205bdd1243dSDimitry Andric     { ISD::AND,     MVT::v8i32,   {  1,  1, 1, 2 } }, // vandps
1206bdd1243dSDimitry Andric     { ISD::AND,     MVT::v4i64,   {  1,  1, 1, 2 } }, // vandps
12070b57cec5SDimitry Andric 
1208bdd1243dSDimitry Andric     { ISD::OR,      MVT::v32i8,   {  1,  1, 1, 2 } }, // vorps
1209bdd1243dSDimitry Andric     { ISD::OR,      MVT::v16i16,  {  1,  1, 1, 2 } }, // vorps
1210bdd1243dSDimitry Andric     { ISD::OR,      MVT::v8i32,   {  1,  1, 1, 2 } }, // vorps
1211bdd1243dSDimitry Andric     { ISD::OR,      MVT::v4i64,   {  1,  1, 1, 2 } }, // vorps
12120b57cec5SDimitry Andric 
1213bdd1243dSDimitry Andric     { ISD::XOR,     MVT::v32i8,   {  1,  1, 1, 2 } }, // vxorps
1214bdd1243dSDimitry Andric     { ISD::XOR,     MVT::v16i16,  {  1,  1, 1, 2 } }, // vxorps
1215bdd1243dSDimitry Andric     { ISD::XOR,     MVT::v8i32,   {  1,  1, 1, 2 } }, // vxorps
1216bdd1243dSDimitry Andric     { ISD::XOR,     MVT::v4i64,   {  1,  1, 1, 2 } }, // vxorps
1217fe6060f1SDimitry Andric 
1218bdd1243dSDimitry Andric     { ISD::SUB,     MVT::v32i8,   {  4,  2, 5, 6 } }, // psubb + split
1219bdd1243dSDimitry Andric     { ISD::ADD,     MVT::v32i8,   {  4,  2, 5, 6 } }, // paddb + split
1220bdd1243dSDimitry Andric     { ISD::SUB,     MVT::v16i16,  {  4,  2, 5, 6 } }, // psubw + split
1221bdd1243dSDimitry Andric     { ISD::ADD,     MVT::v16i16,  {  4,  2, 5, 6 } }, // paddw + split
1222bdd1243dSDimitry Andric     { ISD::SUB,     MVT::v8i32,   {  4,  2, 5, 6 } }, // psubd + split
1223bdd1243dSDimitry Andric     { ISD::ADD,     MVT::v8i32,   {  4,  2, 5, 6 } }, // paddd + split
1224bdd1243dSDimitry Andric     { ISD::SUB,     MVT::v4i64,   {  4,  2, 5, 6 } }, // psubq + split
1225bdd1243dSDimitry Andric     { ISD::ADD,     MVT::v4i64,   {  4,  2, 5, 6 } }, // paddq + split
1226bdd1243dSDimitry Andric     { ISD::SUB,     MVT::v2i64,   {  1,  1, 1, 1 } }, // psubq
1227bdd1243dSDimitry Andric     { ISD::ADD,     MVT::v2i64,   {  1,  1, 1, 1 } }, // paddq
1228fe6060f1SDimitry Andric 
1229bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v16i8,   { 10, 21,11,17 } }, // pblendvb sequence.
1230bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v32i8,   { 22, 22,27,40 } }, // pblendvb sequence + split.
1231bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v8i16,   {  6,  9,11,11 } }, // pblendvb sequence.
1232bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v16i16,  { 13, 16,24,25 } }, // pblendvb sequence + split.
1233bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v4i32,   {  3, 11, 4, 6 } }, // pslld/paddd/cvttps2dq/pmulld
1234bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v8i32,   {  9, 11,12,17 } }, // pslld/paddd/cvttps2dq/pmulld + split
1235bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v2i64,   {  2,  4, 4, 6 } }, // Shift each lane + blend.
1236bdd1243dSDimitry Andric     { ISD::SHL,     MVT::v4i64,   {  6,  7,11,15 } }, // Shift each lane + blend + split.
1237fe6060f1SDimitry Andric 
1238bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v16i8,   { 11, 27,12,18 } }, // pblendvb sequence.
1239bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v32i8,   { 23, 23,30,43 } }, // pblendvb sequence + split.
1240bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v8i16,   { 13, 16,14,22 } }, // pblendvb sequence.
1241bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v16i16,  { 28, 30,31,48 } }, // pblendvb sequence + split.
1242bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v4i32,   {  6,  7,12,16 } }, // Shift each lane + blend.
1243bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v8i32,   { 14, 14,26,34 } }, // Shift each lane + blend + split.
1244bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v2i64,   {  2,  4, 4, 6 } }, // Shift each lane + blend.
1245bdd1243dSDimitry Andric     { ISD::SRL,     MVT::v4i64,   {  6,  7,11,15 } }, // Shift each lane + blend + split.
12460b57cec5SDimitry Andric 
1247bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v16i8,   { 21, 22,24,36 } }, // pblendvb sequence.
1248bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v32i8,   { 44, 45,51,76 } }, // pblendvb sequence + split.
1249bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v8i16,   { 13, 16,14,22 } }, // pblendvb sequence.
1250bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v16i16,  { 28, 30,31,48 } }, // pblendvb sequence + split.
1251bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v4i32,   {  6,  7,12,16 } }, // Shift each lane + blend.
1252bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v8i32,   { 14, 14,26,34 } }, // Shift each lane + blend + split.
1253bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v2i64,   {  5,  6,10,14 } }, // Shift each lane + blend.
1254bdd1243dSDimitry Andric     { ISD::SRA,     MVT::v4i64,   { 12, 12,22,30 } }, // Shift each lane + blend + split.
1255bdd1243dSDimitry Andric 
1256bdd1243dSDimitry Andric     { ISD::FNEG,    MVT::v4f64,   {  2,  2, 1, 2 } }, // BTVER2 from http://www.agner.org/
1257bdd1243dSDimitry Andric     { ISD::FNEG,    MVT::v8f32,   {  2,  2, 1, 2 } }, // BTVER2 from http://www.agner.org/
1258bdd1243dSDimitry Andric 
1259bdd1243dSDimitry Andric     { ISD::FADD,    MVT::f64,     {  1,  5, 1, 1 } }, // BDVER2 from http://www.agner.org/
1260bdd1243dSDimitry Andric     { ISD::FADD,    MVT::f32,     {  1,  5, 1, 1 } }, // BDVER2 from http://www.agner.org/
1261bdd1243dSDimitry Andric     { ISD::FADD,    MVT::v2f64,   {  1,  5, 1, 1 } }, // BDVER2 from http://www.agner.org/
1262bdd1243dSDimitry Andric     { ISD::FADD,    MVT::v4f32,   {  1,  5, 1, 1 } }, // BDVER2 from http://www.agner.org/
1263bdd1243dSDimitry Andric     { ISD::FADD,    MVT::v4f64,   {  2,  5, 1, 2 } }, // BDVER2 from http://www.agner.org/
1264bdd1243dSDimitry Andric     { ISD::FADD,    MVT::v8f32,   {  2,  5, 1, 2 } }, // BDVER2 from http://www.agner.org/
1265bdd1243dSDimitry Andric 
1266bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::f64,     {  1,  5, 1, 1 } }, // BDVER2 from http://www.agner.org/
1267bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::f32,     {  1,  5, 1, 1 } }, // BDVER2 from http://www.agner.org/
1268bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::v2f64,   {  1,  5, 1, 1 } }, // BDVER2 from http://www.agner.org/
1269bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::v4f32,   {  1,  5, 1, 1 } }, // BDVER2 from http://www.agner.org/
1270bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::v4f64,   {  2,  5, 1, 2 } }, // BDVER2 from http://www.agner.org/
1271bdd1243dSDimitry Andric     { ISD::FSUB,    MVT::v8f32,   {  2,  5, 1, 2 } }, // BDVER2 from http://www.agner.org/
1272bdd1243dSDimitry Andric 
1273bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::f64,     {  2,  5, 1, 1 } }, // BTVER2 from http://www.agner.org/
1274bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::f32,     {  1,  5, 1, 1 } }, // BTVER2 from http://www.agner.org/
1275bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v2f64,   {  2,  5, 1, 1 } }, // BTVER2 from http://www.agner.org/
1276bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v4f32,   {  1,  5, 1, 1 } }, // BTVER2 from http://www.agner.org/
1277bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v4f64,   {  4,  5, 1, 2 } }, // BTVER2 from http://www.agner.org/
1278bdd1243dSDimitry Andric     { ISD::FMUL,    MVT::v8f32,   {  2,  5, 1, 2 } }, // BTVER2 from http://www.agner.org/
1279bdd1243dSDimitry Andric 
1280bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::f32,     { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
1281bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v4f32,   { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
1282bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v8f32,   { 28, 29, 1, 3 } }, // SNB from http://www.agner.org/
1283bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::f64,     { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
1284bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v2f64,   { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
1285bdd1243dSDimitry Andric     { ISD::FDIV,    MVT::v4f64,   { 44, 45, 1, 3 } }, // SNB from http://www.agner.org/
12860b57cec5SDimitry Andric   };
12870b57cec5SDimitry Andric 
12880b57cec5SDimitry Andric   if (ST->hasAVX())
12890b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
1290bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1291bdd1243dSDimitry Andric         return LT.first * *KindCost;
12920b57cec5SDimitry Andric 
1293bdd1243dSDimitry Andric   static const CostKindTblEntry SSE42CostTable[] = {
1294bdd1243dSDimitry Andric     { ISD::FADD, MVT::f64,    {  1,  3, 1, 1 } }, // Nehalem from http://www.agner.org/
1295bdd1243dSDimitry Andric     { ISD::FADD, MVT::f32,    {  1,  3, 1, 1 } }, // Nehalem from http://www.agner.org/
1296bdd1243dSDimitry Andric     { ISD::FADD, MVT::v2f64,  {  1,  3, 1, 1 } }, // Nehalem from http://www.agner.org/
1297bdd1243dSDimitry Andric     { ISD::FADD, MVT::v4f32,  {  1,  3, 1, 1 } }, // Nehalem from http://www.agner.org/
12980b57cec5SDimitry Andric 
1299bdd1243dSDimitry Andric     { ISD::FSUB, MVT::f64,    {  1,  3, 1, 1 } }, // Nehalem from http://www.agner.org/
1300bdd1243dSDimitry Andric     { ISD::FSUB, MVT::f32 ,   {  1,  3, 1, 1 } }, // Nehalem from http://www.agner.org/
1301bdd1243dSDimitry Andric     { ISD::FSUB, MVT::v2f64,  {  1,  3, 1, 1 } }, // Nehalem from http://www.agner.org/
1302bdd1243dSDimitry Andric     { ISD::FSUB, MVT::v4f32,  {  1,  3, 1, 1 } }, // Nehalem from http://www.agner.org/
13030b57cec5SDimitry Andric 
1304bdd1243dSDimitry Andric     { ISD::FMUL, MVT::f64,    {  1,  5, 1, 1 } }, // Nehalem from http://www.agner.org/
1305bdd1243dSDimitry Andric     { ISD::FMUL, MVT::f32,    {  1,  5, 1, 1 } }, // Nehalem from http://www.agner.org/
1306bdd1243dSDimitry Andric     { ISD::FMUL, MVT::v2f64,  {  1,  5, 1, 1 } }, // Nehalem from http://www.agner.org/
1307bdd1243dSDimitry Andric     { ISD::FMUL, MVT::v4f32,  {  1,  5, 1, 1 } }, // Nehalem from http://www.agner.org/
13080b57cec5SDimitry Andric 
1309bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::f32,   { 14, 14, 1, 1 } }, // Nehalem from http://www.agner.org/
1310bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::v4f32, { 14, 14, 1, 1 } }, // Nehalem from http://www.agner.org/
1311bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::f64,   { 22, 22, 1, 1 } }, // Nehalem from http://www.agner.org/
1312bdd1243dSDimitry Andric     { ISD::FDIV,  MVT::v2f64, { 22, 22, 1, 1 } }, // Nehalem from http://www.agner.org/
1313fe6060f1SDimitry Andric 
1314bdd1243dSDimitry Andric     { ISD::MUL,   MVT::v2i64, {  6, 10,10,10 } }  // 3*pmuludq/3*shift/2*add
13150b57cec5SDimitry Andric   };
13160b57cec5SDimitry Andric 
13170b57cec5SDimitry Andric   if (ST->hasSSE42())
13180b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE42CostTable, ISD, LT.second))
1319bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1320bdd1243dSDimitry Andric         return LT.first * *KindCost;
13210b57cec5SDimitry Andric 
1322bdd1243dSDimitry Andric   static const CostKindTblEntry SSE41CostTable[] = {
1323bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8,  { 15, 24,17,22 } }, // pblendvb sequence.
1324bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i16,  { 11, 14,11,11 } }, // pblendvb sequence.
1325bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i32,  { 14, 20, 4,10 } }, // pslld/paddd/cvttps2dq/pmulld
13260b57cec5SDimitry Andric 
1327bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8,  { 16, 27,18,24 } }, // pblendvb sequence.
1328bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i16,  { 22, 26,23,27 } }, // pblendvb sequence.
1329bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i32,  { 16, 17,15,19 } }, // Shift each lane + blend.
1330bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v2i64,  {  4,  6, 5, 7 } }, // splat+shuffle sequence.
13310b57cec5SDimitry Andric 
1332bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8,  { 38, 41,30,36 } }, // pblendvb sequence.
1333bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i16,  { 22, 26,23,27 } }, // pblendvb sequence.
1334bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i32,  { 16, 17,15,19 } }, // Shift each lane + blend.
1335bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64,  {  8, 17, 5, 7 } }, // splat+shuffle sequence.
13360b57cec5SDimitry Andric 
1337bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v4i32,  {  2, 11, 1, 1 } }  // pmulld (Nehalem from agner.org)
13380b57cec5SDimitry Andric   };
13390b57cec5SDimitry Andric 
13400b57cec5SDimitry Andric   if (ST->hasSSE41())
13410b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second))
1342bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1343bdd1243dSDimitry Andric         return LT.first * *KindCost;
13440b57cec5SDimitry Andric 
1345*0fca6ea1SDimitry Andric   static const CostKindTblEntry SSSE3CostTable[] = {
1346*0fca6ea1SDimitry Andric     { ISD::MUL,  MVT::v16i8,  {  5, 18,10,12 } }, // 2*pmaddubsw/3*and/psllw/or
1347*0fca6ea1SDimitry Andric   };
1348*0fca6ea1SDimitry Andric 
1349*0fca6ea1SDimitry Andric   if (ST->hasSSSE3())
1350*0fca6ea1SDimitry Andric     if (const auto *Entry = CostTableLookup(SSSE3CostTable, ISD, LT.second))
1351*0fca6ea1SDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1352*0fca6ea1SDimitry Andric         return LT.first * *KindCost;
1353*0fca6ea1SDimitry Andric 
1354bdd1243dSDimitry Andric   static const CostKindTblEntry SSE2CostTable[] = {
13550b57cec5SDimitry Andric     // We don't correctly identify costs of casts because they are marked as
13560b57cec5SDimitry Andric     // custom.
1357bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v16i8,  { 13, 21,26,28 } }, // cmpgtb sequence.
1358bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v8i16,  { 24, 27,16,20 } }, // cmpgtw sequence.
1359bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v4i32,  { 17, 19,10,12 } }, // pslld/paddd/cvttps2dq/pmuludq.
1360bdd1243dSDimitry Andric     { ISD::SHL,  MVT::v2i64,  {  4,  6, 5, 7 } }, // splat+shuffle sequence.
13610b57cec5SDimitry Andric 
1362bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v16i8,  { 14, 28,27,30 } }, // cmpgtb sequence.
1363bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v8i16,  { 16, 19,31,31 } }, // cmpgtw sequence.
1364bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v4i32,  { 12, 12,15,19 } }, // Shift each lane + blend.
1365bdd1243dSDimitry Andric     { ISD::SRL,  MVT::v2i64,  {  4,  6, 5, 7 } }, // splat+shuffle sequence.
13660b57cec5SDimitry Andric 
1367bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v16i8,  { 27, 30,54,54 } }, // unpacked cmpgtb sequence.
1368bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v8i16,  { 16, 19,31,31 } }, // cmpgtw sequence.
1369bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v4i32,  { 12, 12,15,19 } }, // Shift each lane + blend.
1370bdd1243dSDimitry Andric     { ISD::SRA,  MVT::v2i64,  {  8, 11,12,16 } }, // srl/xor/sub splat+shuffle sequence.
13710b57cec5SDimitry Andric 
1372bdd1243dSDimitry Andric     { ISD::AND,  MVT::v16i8,  {  1,  1, 1, 1 } }, // pand
1373bdd1243dSDimitry Andric     { ISD::AND,  MVT::v8i16,  {  1,  1, 1, 1 } }, // pand
1374bdd1243dSDimitry Andric     { ISD::AND,  MVT::v4i32,  {  1,  1, 1, 1 } }, // pand
1375bdd1243dSDimitry Andric     { ISD::AND,  MVT::v2i64,  {  1,  1, 1, 1 } }, // pand
13760b57cec5SDimitry Andric 
1377bdd1243dSDimitry Andric     { ISD::OR,   MVT::v16i8,  {  1,  1, 1, 1 } }, // por
1378bdd1243dSDimitry Andric     { ISD::OR,   MVT::v8i16,  {  1,  1, 1, 1 } }, // por
1379bdd1243dSDimitry Andric     { ISD::OR,   MVT::v4i32,  {  1,  1, 1, 1 } }, // por
1380bdd1243dSDimitry Andric     { ISD::OR,   MVT::v2i64,  {  1,  1, 1, 1 } }, // por
13810b57cec5SDimitry Andric 
1382bdd1243dSDimitry Andric     { ISD::XOR,  MVT::v16i8,  {  1,  1, 1, 1 } }, // pxor
1383bdd1243dSDimitry Andric     { ISD::XOR,  MVT::v8i16,  {  1,  1, 1, 1 } }, // pxor
1384bdd1243dSDimitry Andric     { ISD::XOR,  MVT::v4i32,  {  1,  1, 1, 1 } }, // pxor
1385bdd1243dSDimitry Andric     { ISD::XOR,  MVT::v2i64,  {  1,  1, 1, 1 } }, // pxor
1386fe6060f1SDimitry Andric 
1387bdd1243dSDimitry Andric     { ISD::ADD,  MVT::v2i64,  {  1,  2, 1, 2 } }, // paddq
1388bdd1243dSDimitry Andric     { ISD::SUB,  MVT::v2i64,  {  1,  2, 1, 2 } }, // psubq
13890b57cec5SDimitry Andric 
1390*0fca6ea1SDimitry Andric     { ISD::MUL,  MVT::v16i8,  {  6, 18,12,12 } }, // 2*unpack/2*pmullw/2*and/pack
1391bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v8i16,  {  1,  5, 1, 1 } }, // pmullw
1392bdd1243dSDimitry Andric     { ISD::MUL,  MVT::v4i32,  {  6,  8, 7, 7 } }, // 3*pmuludq/4*shuffle
139306c3fb27SDimitry Andric     { ISD::MUL,  MVT::v2i64,  {  7, 10,10,10 } }, // 3*pmuludq/3*shift/2*add
139406c3fb27SDimitry Andric 
139506c3fb27SDimitry Andric     { X86ISD::PMULUDQ, MVT::v2i64, { 1,  5, 1, 1 } },
1396bdd1243dSDimitry Andric 
1397bdd1243dSDimitry Andric     { ISD::FDIV, MVT::f32,    { 23, 23, 1, 1 } }, // Pentium IV from http://www.agner.org/
1398bdd1243dSDimitry Andric     { ISD::FDIV, MVT::v4f32,  { 39, 39, 1, 1 } }, // Pentium IV from http://www.agner.org/
1399bdd1243dSDimitry Andric     { ISD::FDIV, MVT::f64,    { 38, 38, 1, 1 } }, // Pentium IV from http://www.agner.org/
1400bdd1243dSDimitry Andric     { ISD::FDIV, MVT::v2f64,  { 69, 69, 1, 1 } }, // Pentium IV from http://www.agner.org/
1401bdd1243dSDimitry Andric 
1402bdd1243dSDimitry Andric     { ISD::FNEG, MVT::f32,    {  1,  1, 1, 1 } }, // Pentium IV from http://www.agner.org/
1403bdd1243dSDimitry Andric     { ISD::FNEG, MVT::f64,    {  1,  1, 1, 1 } }, // Pentium IV from http://www.agner.org/
1404bdd1243dSDimitry Andric     { ISD::FNEG, MVT::v4f32,  {  1,  1, 1, 1 } }, // Pentium IV from http://www.agner.org/
1405bdd1243dSDimitry Andric     { ISD::FNEG, MVT::v2f64,  {  1,  1, 1, 1 } }, // Pentium IV from http://www.agner.org/
1406bdd1243dSDimitry Andric 
1407bdd1243dSDimitry Andric     { ISD::FADD, MVT::f32,    {  2,  3, 1, 1 } }, // Pentium IV from http://www.agner.org/
1408bdd1243dSDimitry Andric     { ISD::FADD, MVT::f64,    {  2,  3, 1, 1 } }, // Pentium IV from http://www.agner.org/
1409bdd1243dSDimitry Andric     { ISD::FADD, MVT::v2f64,  {  2,  3, 1, 1 } }, // Pentium IV from http://www.agner.org/
1410bdd1243dSDimitry Andric 
1411bdd1243dSDimitry Andric     { ISD::FSUB, MVT::f32,    {  2,  3, 1, 1 } }, // Pentium IV from http://www.agner.org/
1412bdd1243dSDimitry Andric     { ISD::FSUB, MVT::f64,    {  2,  3, 1, 1 } }, // Pentium IV from http://www.agner.org/
1413bdd1243dSDimitry Andric     { ISD::FSUB, MVT::v2f64,  {  2,  3, 1, 1 } }, // Pentium IV from http://www.agner.org/
1414bdd1243dSDimitry Andric 
1415bdd1243dSDimitry Andric     { ISD::FMUL, MVT::f64,    {  2,  5, 1, 1 } }, // Pentium IV from http://www.agner.org/
1416bdd1243dSDimitry Andric     { ISD::FMUL, MVT::v2f64,  {  2,  5, 1, 1 } }, // Pentium IV from http://www.agner.org/
14170b57cec5SDimitry Andric   };
14180b57cec5SDimitry Andric 
14190b57cec5SDimitry Andric   if (ST->hasSSE2())
14200b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
1421bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1422bdd1243dSDimitry Andric         return LT.first * *KindCost;
14230b57cec5SDimitry Andric 
1424bdd1243dSDimitry Andric   static const CostKindTblEntry SSE1CostTable[] = {
1425bdd1243dSDimitry Andric     { ISD::FDIV, MVT::f32,   { 17, 18, 1, 1 } }, // Pentium III from http://www.agner.org/
1426bdd1243dSDimitry Andric     { ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } }, // Pentium III from http://www.agner.org/
14270b57cec5SDimitry Andric 
1428bdd1243dSDimitry Andric     { ISD::FNEG, MVT::f32,   {  2,  2, 1, 2 } }, // Pentium III from http://www.agner.org/
1429bdd1243dSDimitry Andric     { ISD::FNEG, MVT::v4f32, {  2,  2, 1, 2 } }, // Pentium III from http://www.agner.org/
1430fe6060f1SDimitry Andric 
1431bdd1243dSDimitry Andric     { ISD::FADD, MVT::f32,   {  1,  3, 1, 1 } }, // Pentium III from http://www.agner.org/
1432bdd1243dSDimitry Andric     { ISD::FADD, MVT::v4f32, {  2,  3, 1, 1 } }, // Pentium III from http://www.agner.org/
14330b57cec5SDimitry Andric 
1434bdd1243dSDimitry Andric     { ISD::FSUB, MVT::f32,   {  1,  3, 1, 1 } }, // Pentium III from http://www.agner.org/
1435bdd1243dSDimitry Andric     { ISD::FSUB, MVT::v4f32, {  2,  3, 1, 1 } }, // Pentium III from http://www.agner.org/
1436bdd1243dSDimitry Andric 
1437bdd1243dSDimitry Andric     { ISD::FMUL, MVT::f32,   {  2,  5, 1, 1 } }, // Pentium III from http://www.agner.org/
1438bdd1243dSDimitry Andric     { ISD::FMUL, MVT::v4f32, {  2,  5, 1, 1 } }, // Pentium III from http://www.agner.org/
1439fe6060f1SDimitry Andric   };
14400b57cec5SDimitry Andric 
1441fe6060f1SDimitry Andric   if (ST->hasSSE1())
1442fe6060f1SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second))
1443bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1444bdd1243dSDimitry Andric         return LT.first * *KindCost;
1445fe6060f1SDimitry Andric 
1446bdd1243dSDimitry Andric   static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
1447bdd1243dSDimitry Andric     { ISD::ADD,  MVT::i64,  {  1 } }, // Core (Merom) from http://www.agner.org/
1448bdd1243dSDimitry Andric     { ISD::SUB,  MVT::i64,  {  1 } }, // Core (Merom) from http://www.agner.org/
144906c3fb27SDimitry Andric     { ISD::MUL,  MVT::i64,  {  2,  6,  1,  2 } },
1450fe6060f1SDimitry Andric   };
1451fe6060f1SDimitry Andric 
1452fe6060f1SDimitry Andric   if (ST->is64Bit())
1453fe6060f1SDimitry Andric     if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, LT.second))
1454bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1455bdd1243dSDimitry Andric         return LT.first * *KindCost;
1456fe6060f1SDimitry Andric 
1457bdd1243dSDimitry Andric   static const CostKindTblEntry X86CostTbl[] = { // 32 or 64-bit targets
1458bdd1243dSDimitry Andric     { ISD::ADD,  MVT::i8,  {  1 } }, // Pentium III from http://www.agner.org/
1459bdd1243dSDimitry Andric     { ISD::ADD,  MVT::i16, {  1 } }, // Pentium III from http://www.agner.org/
1460bdd1243dSDimitry Andric     { ISD::ADD,  MVT::i32, {  1 } }, // Pentium III from http://www.agner.org/
14610b57cec5SDimitry Andric 
1462bdd1243dSDimitry Andric     { ISD::SUB,  MVT::i8,  {  1 } }, // Pentium III from http://www.agner.org/
1463bdd1243dSDimitry Andric     { ISD::SUB,  MVT::i16, {  1 } }, // Pentium III from http://www.agner.org/
1464bdd1243dSDimitry Andric     { ISD::SUB,  MVT::i32, {  1 } }, // Pentium III from http://www.agner.org/
1465bdd1243dSDimitry Andric 
146606c3fb27SDimitry Andric     { ISD::MUL,  MVT::i8,  {  3,  4, 1, 1 } },
146706c3fb27SDimitry Andric     { ISD::MUL,  MVT::i16, {  2,  4, 1, 1 } },
146806c3fb27SDimitry Andric     { ISD::MUL,  MVT::i32, {  1,  4, 1, 1 } },
146906c3fb27SDimitry Andric 
1470bdd1243dSDimitry Andric     { ISD::FNEG, MVT::f64, {  2,  2, 1, 3 } }, // (x87)
1471bdd1243dSDimitry Andric     { ISD::FADD, MVT::f64, {  2,  3, 1, 1 } }, // (x87)
1472bdd1243dSDimitry Andric     { ISD::FSUB, MVT::f64, {  2,  3, 1, 1 } }, // (x87)
1473bdd1243dSDimitry Andric     { ISD::FMUL, MVT::f64, {  2,  5, 1, 1 } }, // (x87)
1474bdd1243dSDimitry Andric     { ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } }, // (x87)
14750b57cec5SDimitry Andric   };
14760b57cec5SDimitry Andric 
1477fe6060f1SDimitry Andric   if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, LT.second))
1478bdd1243dSDimitry Andric     if (auto KindCost = Entry->Cost[CostKind])
1479bdd1243dSDimitry Andric       return LT.first * *KindCost;
14800b57cec5SDimitry Andric 
14810b57cec5SDimitry Andric   // It is not a good idea to vectorize division. We have to scalarize it and
14820b57cec5SDimitry Andric   // in the process we will often end up having to spilling regular
14830b57cec5SDimitry Andric   // registers. The overhead of division is going to dominate most kernels
14840b57cec5SDimitry Andric   // anyways so try hard to prevent vectorization of division - it is
14850b57cec5SDimitry Andric   // generally a bad idea. Assume somewhat arbitrarily that we have to be able
14860b57cec5SDimitry Andric   // to hide "20 cycles" for each lane.
1487bdd1243dSDimitry Andric   if (CostKind == TTI::TCK_RecipThroughput && LT.second.isVector() &&
1488bdd1243dSDimitry Andric       (ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV ||
1489bdd1243dSDimitry Andric        ISD == ISD::UREM)) {
1490bdd1243dSDimitry Andric     InstructionCost ScalarCost =
1491bdd1243dSDimitry Andric         getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind,
1492bdd1243dSDimitry Andric                                Op1Info.getNoProps(), Op2Info.getNoProps());
14930b57cec5SDimitry Andric     return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
14940b57cec5SDimitry Andric   }
14950b57cec5SDimitry Andric 
1496bdd1243dSDimitry Andric   // Handle some basic single instruction code size cases.
1497bdd1243dSDimitry Andric   if (CostKind == TTI::TCK_CodeSize) {
1498bdd1243dSDimitry Andric     switch (ISD) {
1499bdd1243dSDimitry Andric     case ISD::FADD:
1500bdd1243dSDimitry Andric     case ISD::FSUB:
1501bdd1243dSDimitry Andric     case ISD::FMUL:
1502bdd1243dSDimitry Andric     case ISD::FDIV:
1503bdd1243dSDimitry Andric     case ISD::FNEG:
1504bdd1243dSDimitry Andric     case ISD::AND:
1505bdd1243dSDimitry Andric     case ISD::OR:
1506bdd1243dSDimitry Andric     case ISD::XOR:
1507bdd1243dSDimitry Andric       return LT.first;
1508bdd1243dSDimitry Andric       break;
1509bdd1243dSDimitry Andric     }
1510bdd1243dSDimitry Andric   }
1511bdd1243dSDimitry Andric 
15120b57cec5SDimitry Andric   // Fallback to the default implementation.
1513bdd1243dSDimitry Andric   return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
1514bdd1243dSDimitry Andric                                        Args, CxtI);
15150b57cec5SDimitry Andric }
15160b57cec5SDimitry Andric 
1517647cbc5dSDimitry Andric InstructionCost
1518647cbc5dSDimitry Andric X86TTIImpl::getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
1519647cbc5dSDimitry Andric                             unsigned Opcode1, const SmallBitVector &OpcodeMask,
1520647cbc5dSDimitry Andric                             TTI::TargetCostKind CostKind) const {
1521647cbc5dSDimitry Andric   if (isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask))
1522647cbc5dSDimitry Andric     return TTI::TCC_Basic;
1523647cbc5dSDimitry Andric   return InstructionCost::getInvalid();
1524647cbc5dSDimitry Andric }
1525647cbc5dSDimitry Andric 
1526*0fca6ea1SDimitry Andric InstructionCost X86TTIImpl::getShuffleCost(
1527*0fca6ea1SDimitry Andric     TTI::ShuffleKind Kind, VectorType *BaseTp, ArrayRef<int> Mask,
1528*0fca6ea1SDimitry Andric     TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
1529*0fca6ea1SDimitry Andric     ArrayRef<const Value *> Args, const Instruction *CxtI) {
15300b57cec5SDimitry Andric   // 64-bit packed float vectors (v2f32) are widened to type v4f32.
15318bcb0991SDimitry Andric   // 64-bit packed integer vectors (v2i32) are widened to type v4i32.
1532bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(BaseTp);
15330b57cec5SDimitry Andric 
15345f757f3fSDimitry Andric   Kind = improveShuffleKindFromMask(Kind, Mask, BaseTp, Index, SubTp);
1535bdd1243dSDimitry Andric 
1536*0fca6ea1SDimitry Andric   // Recognize a basic concat_vector shuffle.
1537*0fca6ea1SDimitry Andric   if (Kind == TTI::SK_PermuteTwoSrc &&
1538*0fca6ea1SDimitry Andric       Mask.size() == (2 * BaseTp->getElementCount().getKnownMinValue()) &&
1539*0fca6ea1SDimitry Andric       ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
1540*0fca6ea1SDimitry Andric     return getShuffleCost(TTI::SK_InsertSubvector,
1541*0fca6ea1SDimitry Andric                           VectorType::getDoubleElementsVectorType(BaseTp), Mask,
1542*0fca6ea1SDimitry Andric                           CostKind, Mask.size() / 2, BaseTp);
1543*0fca6ea1SDimitry Andric 
15440b57cec5SDimitry Andric   // Treat Transpose as 2-op shuffles - there's no difference in lowering.
15450b57cec5SDimitry Andric   if (Kind == TTI::SK_Transpose)
15460b57cec5SDimitry Andric     Kind = TTI::SK_PermuteTwoSrc;
15470b57cec5SDimitry Andric 
1548*0fca6ea1SDimitry Andric   if (Kind == TTI::SK_Broadcast) {
15490b57cec5SDimitry Andric     // For Broadcasts we are splatting the first element from the first input
15500b57cec5SDimitry Andric     // register, so only need to reference that input and all the output
15510b57cec5SDimitry Andric     // registers are the same.
15520b57cec5SDimitry Andric     LT.first = 1;
15530b57cec5SDimitry Andric 
1554*0fca6ea1SDimitry Andric     // If we're broadcasting a load then AVX/AVX2 can do this for free.
1555*0fca6ea1SDimitry Andric     using namespace PatternMatch;
1556*0fca6ea1SDimitry Andric     if (!Args.empty() && match(Args[0], m_OneUse(m_Load(m_Value()))) &&
1557*0fca6ea1SDimitry Andric         (ST->hasAVX2() ||
1558*0fca6ea1SDimitry Andric          (ST->hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1559*0fca6ea1SDimitry Andric       return TTI::TCC_Free;
1560*0fca6ea1SDimitry Andric   }
1561*0fca6ea1SDimitry Andric 
15625f757f3fSDimitry Andric   // Treat <X x bfloat> shuffles as <X x half>.
15635f757f3fSDimitry Andric   if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
15645f757f3fSDimitry Andric     LT.second = LT.second.changeVectorElementType(MVT::f16);
15655f757f3fSDimitry Andric 
15660b57cec5SDimitry Andric   // Subvector extractions are free if they start at the beginning of a
15670b57cec5SDimitry Andric   // vector and cheap if the subvectors are aligned.
15680b57cec5SDimitry Andric   if (Kind == TTI::SK_ExtractSubvector && LT.second.isVector()) {
15690b57cec5SDimitry Andric     int NumElts = LT.second.getVectorNumElements();
15700b57cec5SDimitry Andric     if ((Index % NumElts) == 0)
15710b57cec5SDimitry Andric       return 0;
1572bdd1243dSDimitry Andric     std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
15730b57cec5SDimitry Andric     if (SubLT.second.isVector()) {
15740b57cec5SDimitry Andric       int NumSubElts = SubLT.second.getVectorNumElements();
15750b57cec5SDimitry Andric       if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
15760b57cec5SDimitry Andric         return SubLT.first;
15778bcb0991SDimitry Andric       // Handle some cases for widening legalization. For now we only handle
15788bcb0991SDimitry Andric       // cases where the original subvector was naturally aligned and evenly
15798bcb0991SDimitry Andric       // fit in its legalized subvector type.
15808bcb0991SDimitry Andric       // FIXME: Remove some of the alignment restrictions.
15818bcb0991SDimitry Andric       // FIXME: We can use permq for 64-bit or larger extracts from 256-bit
15828bcb0991SDimitry Andric       // vectors.
15835ffd83dbSDimitry Andric       int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
15845ffd83dbSDimitry Andric       if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
15855ffd83dbSDimitry Andric           (NumSubElts % OrigSubElts) == 0 &&
15868bcb0991SDimitry Andric           LT.second.getVectorElementType() ==
15878bcb0991SDimitry Andric               SubLT.second.getVectorElementType() &&
15888bcb0991SDimitry Andric           LT.second.getVectorElementType().getSizeInBits() ==
15895ffd83dbSDimitry Andric               BaseTp->getElementType()->getPrimitiveSizeInBits()) {
15908bcb0991SDimitry Andric         assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
15918bcb0991SDimitry Andric                "Unexpected number of elements!");
15925ffd83dbSDimitry Andric         auto *VecTy = FixedVectorType::get(BaseTp->getElementType(),
15938bcb0991SDimitry Andric                                            LT.second.getVectorNumElements());
15945ffd83dbSDimitry Andric         auto *SubTy = FixedVectorType::get(BaseTp->getElementType(),
15958bcb0991SDimitry Andric                                            SubLT.second.getVectorNumElements());
15968bcb0991SDimitry Andric         int ExtractIndex = alignDown((Index % NumElts), NumSubElts);
1597bdd1243dSDimitry Andric         InstructionCost ExtractCost =
1598bdd1243dSDimitry Andric             getShuffleCost(TTI::SK_ExtractSubvector, VecTy, std::nullopt,
1599bdd1243dSDimitry Andric                            CostKind, ExtractIndex, SubTy);
16008bcb0991SDimitry Andric 
16018bcb0991SDimitry Andric         // If the original size is 32-bits or more, we can use pshufd. Otherwise
16028bcb0991SDimitry Andric         // if we have SSSE3 we can use pshufb.
16038bcb0991SDimitry Andric         if (SubTp->getPrimitiveSizeInBits() >= 32 || ST->hasSSSE3())
16048bcb0991SDimitry Andric           return ExtractCost + 1; // pshufd or pshufb
16058bcb0991SDimitry Andric 
16068bcb0991SDimitry Andric         assert(SubTp->getPrimitiveSizeInBits() == 16 &&
16078bcb0991SDimitry Andric                "Unexpected vector size");
16088bcb0991SDimitry Andric 
16098bcb0991SDimitry Andric         return ExtractCost + 2; // worst case pshufhw + pshufd
16108bcb0991SDimitry Andric       }
16110b57cec5SDimitry Andric     }
1612*0fca6ea1SDimitry Andric     // If the extract subvector is not optimal, treat it as single op shuffle.
1613*0fca6ea1SDimitry Andric     Kind = TTI::SK_PermuteSingleSrc;
16140b57cec5SDimitry Andric   }
16150b57cec5SDimitry Andric 
1616fe6060f1SDimitry Andric   // Subvector insertions are cheap if the subvectors are aligned.
1617fe6060f1SDimitry Andric   // Note that in general, the insertion starting at the beginning of a vector
1618fe6060f1SDimitry Andric   // isn't free, because we need to preserve the rest of the wide vector.
1619fe6060f1SDimitry Andric   if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) {
1620fe6060f1SDimitry Andric     int NumElts = LT.second.getVectorNumElements();
1621bdd1243dSDimitry Andric     std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
1622fe6060f1SDimitry Andric     if (SubLT.second.isVector()) {
1623fe6060f1SDimitry Andric       int NumSubElts = SubLT.second.getVectorNumElements();
1624fe6060f1SDimitry Andric       if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1625fe6060f1SDimitry Andric         return SubLT.first;
1626fe6060f1SDimitry Andric     }
1627349cc55cSDimitry Andric 
1628349cc55cSDimitry Andric     // If the insertion isn't aligned, treat it like a 2-op shuffle.
1629349cc55cSDimitry Andric     Kind = TTI::SK_PermuteTwoSrc;
1630fe6060f1SDimitry Andric   }
1631fe6060f1SDimitry Andric 
16325ffd83dbSDimitry Andric   // Handle some common (illegal) sub-vector types as they are often very cheap
16335ffd83dbSDimitry Andric   // to shuffle even on targets without PSHUFB.
16345ffd83dbSDimitry Andric   EVT VT = TLI->getValueType(DL, BaseTp);
16355ffd83dbSDimitry Andric   if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 &&
16365ffd83dbSDimitry Andric       !ST->hasSSSE3()) {
16375ffd83dbSDimitry Andric      static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
16385ffd83dbSDimitry Andric       {TTI::SK_Broadcast,        MVT::v4i16, 1}, // pshuflw
16395ffd83dbSDimitry Andric       {TTI::SK_Broadcast,        MVT::v2i16, 1}, // pshuflw
16405ffd83dbSDimitry Andric       {TTI::SK_Broadcast,        MVT::v8i8,  2}, // punpck/pshuflw
16415ffd83dbSDimitry Andric       {TTI::SK_Broadcast,        MVT::v4i8,  2}, // punpck/pshuflw
16425ffd83dbSDimitry Andric       {TTI::SK_Broadcast,        MVT::v2i8,  1}, // punpck
16435ffd83dbSDimitry Andric 
16445ffd83dbSDimitry Andric       {TTI::SK_Reverse,          MVT::v4i16, 1}, // pshuflw
16455ffd83dbSDimitry Andric       {TTI::SK_Reverse,          MVT::v2i16, 1}, // pshuflw
16465ffd83dbSDimitry Andric       {TTI::SK_Reverse,          MVT::v4i8,  3}, // punpck/pshuflw/packus
16475ffd83dbSDimitry Andric       {TTI::SK_Reverse,          MVT::v2i8,  1}, // punpck
16485ffd83dbSDimitry Andric 
1649bdd1243dSDimitry Andric       {TTI::SK_Splice,           MVT::v4i16, 2}, // punpck+psrldq
1650bdd1243dSDimitry Andric       {TTI::SK_Splice,           MVT::v2i16, 2}, // punpck+psrldq
1651bdd1243dSDimitry Andric       {TTI::SK_Splice,           MVT::v4i8,  2}, // punpck+psrldq
1652bdd1243dSDimitry Andric       {TTI::SK_Splice,           MVT::v2i8,  2}, // punpck+psrldq
1653bdd1243dSDimitry Andric 
16545ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc,    MVT::v4i16, 2}, // punpck/pshuflw
16555ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc,    MVT::v2i16, 2}, // punpck/pshuflw
16565ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc,    MVT::v8i8,  7}, // punpck/pshuflw
16575ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc,    MVT::v4i8,  4}, // punpck/pshuflw
16585ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc,    MVT::v2i8,  2}, // punpck
16595ffd83dbSDimitry Andric 
16605ffd83dbSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4i16, 1}, // pshuflw
16615ffd83dbSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v2i16, 1}, // pshuflw
16625ffd83dbSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8i8,  5}, // punpck/pshuflw
16635ffd83dbSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4i8,  3}, // punpck/pshuflw
16645ffd83dbSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v2i8,  1}, // punpck
16655ffd83dbSDimitry Andric     };
16665ffd83dbSDimitry Andric 
16675ffd83dbSDimitry Andric     if (ST->hasSSE2())
16685ffd83dbSDimitry Andric       if (const auto *Entry =
16695ffd83dbSDimitry Andric               CostTableLookup(SSE2SubVectorShuffleTbl, Kind, VT.getSimpleVT()))
16705ffd83dbSDimitry Andric         return Entry->Cost;
16715ffd83dbSDimitry Andric   }
16725ffd83dbSDimitry Andric 
16730b57cec5SDimitry Andric   // We are going to permute multiple sources and the result will be in multiple
16740b57cec5SDimitry Andric   // destinations. Providing an accurate cost only for splits where the element
16750b57cec5SDimitry Andric   // type remains the same.
16760b57cec5SDimitry Andric   if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {
16770b57cec5SDimitry Andric     MVT LegalVT = LT.second;
16780b57cec5SDimitry Andric     if (LegalVT.isVector() &&
16790b57cec5SDimitry Andric         LegalVT.getVectorElementType().getSizeInBits() ==
16805ffd83dbSDimitry Andric             BaseTp->getElementType()->getPrimitiveSizeInBits() &&
16815ffd83dbSDimitry Andric         LegalVT.getVectorNumElements() <
16825ffd83dbSDimitry Andric             cast<FixedVectorType>(BaseTp)->getNumElements()) {
16835ffd83dbSDimitry Andric       unsigned VecTySize = DL.getTypeStoreSize(BaseTp);
16840b57cec5SDimitry Andric       unsigned LegalVTSize = LegalVT.getStoreSize();
16850b57cec5SDimitry Andric       // Number of source vectors after legalization:
16860b57cec5SDimitry Andric       unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
16870b57cec5SDimitry Andric       // Number of destination vectors after legalization:
1688fe6060f1SDimitry Andric       InstructionCost NumOfDests = LT.first;
16890b57cec5SDimitry Andric 
16905ffd83dbSDimitry Andric       auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(),
16910b57cec5SDimitry Andric                                               LegalVT.getVectorNumElements());
16920b57cec5SDimitry Andric 
169381ad6265SDimitry Andric       if (!Mask.empty() && NumOfDests.isValid()) {
169481ad6265SDimitry Andric         // Try to perform better estimation of the permutation.
169581ad6265SDimitry Andric         // 1. Split the source/destination vectors into real registers.
169681ad6265SDimitry Andric         // 2. Do the mask analysis to identify which real registers are
169781ad6265SDimitry Andric         // permuted. If more than 1 source registers are used for the
169881ad6265SDimitry Andric         // destination register building, the cost for this destination register
169981ad6265SDimitry Andric         // is (Number_of_source_register - 1) * Cost_PermuteTwoSrc. If only one
170081ad6265SDimitry Andric         // source register is used, build mask and calculate the cost as a cost
170181ad6265SDimitry Andric         // of PermuteSingleSrc.
170281ad6265SDimitry Andric         // Also, for the single register permute we try to identify if the
170381ad6265SDimitry Andric         // destination register is just a copy of the source register or the
170481ad6265SDimitry Andric         // copy of the previous destination register (the cost is
170581ad6265SDimitry Andric         // TTI::TCC_Basic). If the source register is just reused, the cost for
170681ad6265SDimitry Andric         // this operation is 0.
17075f757f3fSDimitry Andric         NumOfDests =
17085f757f3fSDimitry Andric             getTypeLegalizationCost(
17095f757f3fSDimitry Andric                 FixedVectorType::get(BaseTp->getElementType(), Mask.size()))
17105f757f3fSDimitry Andric                 .first;
171181ad6265SDimitry Andric         unsigned E = *NumOfDests.getValue();
171281ad6265SDimitry Andric         unsigned NormalizedVF =
171381ad6265SDimitry Andric             LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
171481ad6265SDimitry Andric         unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
171581ad6265SDimitry Andric         unsigned NumOfDestRegs = NormalizedVF / LegalVT.getVectorNumElements();
171606c3fb27SDimitry Andric         SmallVector<int> NormalizedMask(NormalizedVF, PoisonMaskElem);
171781ad6265SDimitry Andric         copy(Mask, NormalizedMask.begin());
171881ad6265SDimitry Andric         unsigned PrevSrcReg = 0;
171981ad6265SDimitry Andric         ArrayRef<int> PrevRegMask;
172081ad6265SDimitry Andric         InstructionCost Cost = 0;
172181ad6265SDimitry Andric         processShuffleMasks(
172281ad6265SDimitry Andric             NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1723bdd1243dSDimitry Andric             [this, SingleOpTy, CostKind, &PrevSrcReg, &PrevRegMask,
172481ad6265SDimitry Andric              &Cost](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
17255f757f3fSDimitry Andric               if (!ShuffleVectorInst::isIdentityMask(RegMask, RegMask.size())) {
172681ad6265SDimitry Andric                 // Check if the previous register can be just copied to the next
172781ad6265SDimitry Andric                 // one.
172881ad6265SDimitry Andric                 if (PrevRegMask.empty() || PrevSrcReg != SrcReg ||
172981ad6265SDimitry Andric                     PrevRegMask != RegMask)
173081ad6265SDimitry Andric                   Cost += getShuffleCost(TTI::SK_PermuteSingleSrc, SingleOpTy,
1731bdd1243dSDimitry Andric                                          RegMask, CostKind, 0, nullptr);
173281ad6265SDimitry Andric                 else
173381ad6265SDimitry Andric                   // Just a copy of previous destination register.
173481ad6265SDimitry Andric                   Cost += TTI::TCC_Basic;
173581ad6265SDimitry Andric                 return;
173681ad6265SDimitry Andric               }
173781ad6265SDimitry Andric               if (SrcReg != DestReg &&
173806c3fb27SDimitry Andric                   any_of(RegMask, [](int I) { return I != PoisonMaskElem; })) {
173981ad6265SDimitry Andric                 // Just a copy of the source register.
174081ad6265SDimitry Andric                 Cost += TTI::TCC_Basic;
174181ad6265SDimitry Andric               }
174281ad6265SDimitry Andric               PrevSrcReg = SrcReg;
174381ad6265SDimitry Andric               PrevRegMask = RegMask;
174481ad6265SDimitry Andric             },
1745bdd1243dSDimitry Andric             [this, SingleOpTy, CostKind, &Cost](ArrayRef<int> RegMask,
174681ad6265SDimitry Andric                                                 unsigned /*Unused*/,
174781ad6265SDimitry Andric                                                 unsigned /*Unused*/) {
174881ad6265SDimitry Andric               Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, RegMask,
1749bdd1243dSDimitry Andric                                      CostKind, 0, nullptr);
175081ad6265SDimitry Andric             });
175181ad6265SDimitry Andric         return Cost;
175281ad6265SDimitry Andric       }
175381ad6265SDimitry Andric 
1754fe6060f1SDimitry Andric       InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
1755fe6060f1SDimitry Andric       return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy,
1756bdd1243dSDimitry Andric                                             std::nullopt, CostKind, 0, nullptr);
17570b57cec5SDimitry Andric     }
17580b57cec5SDimitry Andric 
1759bdd1243dSDimitry Andric     return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);
17600b57cec5SDimitry Andric   }
17610b57cec5SDimitry Andric 
17620b57cec5SDimitry Andric   // For 2-input shuffles, we must account for splitting the 2 inputs into many.
17630b57cec5SDimitry Andric   if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
17640b57cec5SDimitry Andric     // We assume that source and destination have the same vector type.
1765fe6060f1SDimitry Andric     InstructionCost NumOfDests = LT.first;
1766fe6060f1SDimitry Andric     InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1;
17670b57cec5SDimitry Andric     LT.first = NumOfDests * NumOfShufflesPerDest;
17680b57cec5SDimitry Andric   }
17690b57cec5SDimitry Andric 
17700b57cec5SDimitry Andric   static const CostTblEntry AVX512VBMIShuffleTbl[] = {
17710b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
17720b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
17730b57cec5SDimitry Andric 
17740b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb
17750b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb
17760b57cec5SDimitry Andric 
17775ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v64i8, 2}, // vpermt2b
17785ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v32i8, 2}, // vpermt2b
17795ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16i8, 2}  // vpermt2b
17800b57cec5SDimitry Andric   };
17810b57cec5SDimitry Andric 
17820b57cec5SDimitry Andric   if (ST->hasVBMI())
17830b57cec5SDimitry Andric     if (const auto *Entry =
17840b57cec5SDimitry Andric             CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
17850b57cec5SDimitry Andric       return LT.first * Entry->Cost;
17860b57cec5SDimitry Andric 
17870b57cec5SDimitry Andric   static const CostTblEntry AVX512BWShuffleTbl[] = {
17880b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
1789fcaf7f86SDimitry Andric       {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw
17900b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v64i8, 1},  // vpbroadcastb
17910b57cec5SDimitry Andric 
17925ffd83dbSDimitry Andric       {TTI::SK_Reverse, MVT::v32i16, 2}, // vpermw
1793fcaf7f86SDimitry Andric       {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw
17945ffd83dbSDimitry Andric       {TTI::SK_Reverse, MVT::v16i16, 2}, // vpermw
17950b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v64i8, 2},  // pshufb + vshufi64x2
17960b57cec5SDimitry Andric 
17975ffd83dbSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, // vpermw
1798fcaf7f86SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw
17995ffd83dbSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, // vpermw
1800fcaf7f86SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw
18010b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8},  // extend to v32i16
18020b57cec5SDimitry Andric 
18035ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, // vpermt2w
1804fcaf7f86SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w
18055ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w
18065ffd83dbSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2},  // vpermt2w
18070b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
1808e8d8bef9SDimitry Andric 
1809e8d8bef9SDimitry Andric       {TTI::SK_Select, MVT::v32i16, 1}, // vblendmw
1810e8d8bef9SDimitry Andric       {TTI::SK_Select, MVT::v64i8,  1}, // vblendmb
1811bdd1243dSDimitry Andric 
1812bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v32i16, 2}, // vshufi64x2 + palignr
1813bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v32f16, 2}, // vshufi64x2 + palignr
1814bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v64i8,  2}, // vshufi64x2 + palignr
18150b57cec5SDimitry Andric   };
18160b57cec5SDimitry Andric 
18170b57cec5SDimitry Andric   if (ST->hasBWI())
18180b57cec5SDimitry Andric     if (const auto *Entry =
18190b57cec5SDimitry Andric             CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
18200b57cec5SDimitry Andric       return LT.first * Entry->Cost;
18210b57cec5SDimitry Andric 
1822bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512ShuffleTbl[] = {
1823bdd1243dSDimitry Andric       {TTI::SK_Broadcast, MVT::v8f64,  { 1, 1, 1, 1 } }, // vbroadcastsd
1824bdd1243dSDimitry Andric       {TTI::SK_Broadcast, MVT::v16f32, { 1, 1, 1, 1 } }, // vbroadcastss
1825bdd1243dSDimitry Andric       {TTI::SK_Broadcast, MVT::v8i64,  { 1, 1, 1, 1 } }, // vpbroadcastq
1826bdd1243dSDimitry Andric       {TTI::SK_Broadcast, MVT::v16i32, { 1, 1, 1, 1 } }, // vpbroadcastd
1827bdd1243dSDimitry Andric       {TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
1828bdd1243dSDimitry Andric       {TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
1829bdd1243dSDimitry Andric       {TTI::SK_Broadcast, MVT::v64i8,  { 1, 1, 1, 1 } }, // vpbroadcastb
18300b57cec5SDimitry Andric 
1831bdd1243dSDimitry Andric       {TTI::SK_Reverse, MVT::v8f64,  { 1, 3, 1, 1 } }, // vpermpd
1832bdd1243dSDimitry Andric       {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps
1833bdd1243dSDimitry Andric       {TTI::SK_Reverse, MVT::v8i64,  { 1, 3, 1, 1 } }, // vpermq
1834bdd1243dSDimitry Andric       {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermd
1835bdd1243dSDimitry Andric       {TTI::SK_Reverse, MVT::v32i16, { 7, 7, 7, 7 } }, // per mca
1836bdd1243dSDimitry Andric       {TTI::SK_Reverse, MVT::v32f16, { 7, 7, 7, 7 } }, // per mca
1837bdd1243dSDimitry Andric       {TTI::SK_Reverse, MVT::v64i8,  { 7, 7, 7, 7 } }, // per mca
18380b57cec5SDimitry Andric 
1839bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8f64,  { 1, 1, 1, 1 } }, // vpalignd
1840bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v4f64,  { 1, 1, 1, 1 } }, // vpalignd
1841bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v16f32, { 1, 1, 1, 1 } }, // vpalignd
1842bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8f32,  { 1, 1, 1, 1 } }, // vpalignd
1843bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8i64,  { 1, 1, 1, 1 } }, // vpalignd
1844bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v4i64,  { 1, 1, 1, 1 } }, // vpalignd
1845bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v16i32, { 1, 1, 1, 1 } }, // vpalignd
1846bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8i32,  { 1, 1, 1, 1 } }, // vpalignd
1847bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v32i16, { 4, 4, 4, 4 } }, // split + palignr
1848bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v32f16, { 4, 4, 4, 4 } }, // split + palignr
1849bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v64i8,  { 4, 4, 4, 4 } }, // split + palignr
18500b57cec5SDimitry Andric 
1851bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8f64,  { 1, 3, 1, 1 } }, // vpermpd
1852bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4f64,  { 1, 3, 1, 1 } }, // vpermpd
1853bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v2f64,  { 1, 3, 1, 1 } }, // vpermpd
1854bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps
1855bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8f32,  { 1, 3, 1, 1 } }, // vpermps
1856bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4f32,  { 1, 3, 1, 1 } }, // vpermps
1857bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8i64,  { 1, 3, 1, 1 } }, // vpermq
1858bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4i64,  { 1, 3, 1, 1 } }, // vpermq
1859bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v2i64,  { 1, 3, 1, 1 } }, // vpermq
1860bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermd
1861bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8i32,  { 1, 3, 1, 1 } }, // vpermd
1862bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4i32,  { 1, 3, 1, 1 } }, // vpermd
1863bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16i8,  { 1, 3, 1, 1 } }, // pshufb
1864bdd1243dSDimitry Andric 
1865bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8f64,  { 1, 3, 1, 1 } }, // vpermt2pd
1866bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermt2ps
1867bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8i64,  { 1, 3, 1, 1 } }, // vpermt2q
1868bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermt2d
1869bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v4f64,  { 1, 3, 1, 1 } }, // vpermt2pd
1870bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8f32,  { 1, 3, 1, 1 } }, // vpermt2ps
1871bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v4i64,  { 1, 3, 1, 1 } }, // vpermt2q
1872bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8i32,  { 1, 3, 1, 1 } }, // vpermt2d
1873bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v2f64,  { 1, 3, 1, 1 } }, // vpermt2pd
1874bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v4f32,  { 1, 3, 1, 1 } }, // vpermt2ps
1875bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v2i64,  { 1, 3, 1, 1 } }, // vpermt2q
1876bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v4i32,  { 1, 3, 1, 1 } }, // vpermt2d
18775ffd83dbSDimitry Andric 
18785ffd83dbSDimitry Andric       // FIXME: This just applies the type legalization cost rules above
18795ffd83dbSDimitry Andric       // assuming these completely split.
1880bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v32i16, { 14, 14, 14, 14 } },
1881bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v32f16, { 14, 14, 14, 14 } },
1882bdd1243dSDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v64i8,  { 14, 14, 14, 14 } },
1883bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc,    MVT::v32i16, { 42, 42, 42, 42 } },
1884bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc,    MVT::v32f16, { 42, 42, 42, 42 } },
1885bdd1243dSDimitry Andric       {TTI::SK_PermuteTwoSrc,    MVT::v64i8,  { 42, 42, 42, 42 } },
1886e8d8bef9SDimitry Andric 
1887bdd1243dSDimitry Andric       {TTI::SK_Select, MVT::v32i16, { 1, 1, 1, 1 } }, // vpternlogq
1888bdd1243dSDimitry Andric       {TTI::SK_Select, MVT::v32f16, { 1, 1, 1, 1 } }, // vpternlogq
1889bdd1243dSDimitry Andric       {TTI::SK_Select, MVT::v64i8,  { 1, 1, 1, 1 } }, // vpternlogq
1890bdd1243dSDimitry Andric       {TTI::SK_Select, MVT::v8f64,  { 1, 1, 1, 1 } }, // vblendmpd
1891bdd1243dSDimitry Andric       {TTI::SK_Select, MVT::v16f32, { 1, 1, 1, 1 } }, // vblendmps
1892bdd1243dSDimitry Andric       {TTI::SK_Select, MVT::v8i64,  { 1, 1, 1, 1 } }, // vblendmq
1893bdd1243dSDimitry Andric       {TTI::SK_Select, MVT::v16i32, { 1, 1, 1, 1 } }, // vblendmd
18940b57cec5SDimitry Andric   };
18950b57cec5SDimitry Andric 
18960b57cec5SDimitry Andric   if (ST->hasAVX512())
18970b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1898bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
1899bdd1243dSDimitry Andric         return LT.first * *KindCost;
19000b57cec5SDimitry Andric 
19010b57cec5SDimitry Andric   static const CostTblEntry AVX2ShuffleTbl[] = {
19020b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v4f64, 1},  // vbroadcastpd
19030b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v8f32, 1},  // vbroadcastps
19040b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v4i64, 1},  // vpbroadcastq
19050b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v8i32, 1},  // vpbroadcastd
19060b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw
1907fcaf7f86SDimitry Andric       {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw
19080b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v32i8, 1},  // vpbroadcastb
19090b57cec5SDimitry Andric 
19100b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v4f64, 1},  // vpermpd
19110b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v8f32, 1},  // vpermps
19120b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v4i64, 1},  // vpermq
19130b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v8i32, 1},  // vpermd
19140b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb
1915fcaf7f86SDimitry Andric       {TTI::SK_Reverse, MVT::v16f16, 2}, // vperm2i128 + pshufb
19160b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v32i8, 2},  // vperm2i128 + pshufb
19170b57cec5SDimitry Andric 
19180b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb
1919fcaf7f86SDimitry Andric       {TTI::SK_Select, MVT::v16f16, 1}, // vpblendvb
19200b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v32i8,  1}, // vpblendvb
19210b57cec5SDimitry Andric 
1922bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8i32,  2}, // vperm2i128 + vpalignr
1923bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8f32,  2}, // vperm2i128 + vpalignr
1924bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v16i16, 2}, // vperm2i128 + vpalignr
1925bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v16f16, 2}, // vperm2i128 + vpalignr
1926bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v32i8,  2}, // vperm2i128 + vpalignr
1927bdd1243dSDimitry Andric 
19280b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1},  // vpermpd
19290b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1},  // vpermps
19300b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1},  // vpermq
19310b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1},  // vpermd
19320b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb
19330b57cec5SDimitry Andric                                                   // + vpblendvb
1934fcaf7f86SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16f16, 4}, // vperm2i128 + 2*vpshufb
1935fcaf7f86SDimitry Andric                                                   // + vpblendvb
19360b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4},  // vperm2i128 + 2*vpshufb
19370b57cec5SDimitry Andric                                                   // + vpblendvb
19380b57cec5SDimitry Andric 
19390b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3},  // 2*vpermpd + vblendpd
19400b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3},  // 2*vpermps + vblendps
19410b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3},  // 2*vpermq + vpblendd
19420b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3},  // 2*vpermd + vpblendd
19430b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb
19440b57cec5SDimitry Andric                                                // + vpblendvb
1945fcaf7f86SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16f16, 7}, // 2*vperm2i128 + 4*vpshufb
1946fcaf7f86SDimitry Andric                                                // + vpblendvb
19470b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7},  // 2*vperm2i128 + 4*vpshufb
19480b57cec5SDimitry Andric                                                // + vpblendvb
19490b57cec5SDimitry Andric   };
19500b57cec5SDimitry Andric 
19510b57cec5SDimitry Andric   if (ST->hasAVX2())
19520b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
19530b57cec5SDimitry Andric       return LT.first * Entry->Cost;
19540b57cec5SDimitry Andric 
19550b57cec5SDimitry Andric   static const CostTblEntry XOPShuffleTbl[] = {
19560b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2},  // vperm2f128 + vpermil2pd
19570b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2},  // vperm2f128 + vpermil2ps
19580b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2},  // vperm2f128 + vpermil2pd
19590b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2},  // vperm2f128 + vpermil2ps
19600b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 + 2*vpperm
19610b57cec5SDimitry Andric                                                   // + vinsertf128
19620b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4},  // vextractf128 + 2*vpperm
19630b57cec5SDimitry Andric                                                   // + vinsertf128
19640b57cec5SDimitry Andric 
19650b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 + 6*vpperm
19660b57cec5SDimitry Andric                                                // + vinsertf128
19670b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1},  // vpperm
19680b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9},  // 2*vextractf128 + 6*vpperm
19690b57cec5SDimitry Andric                                                // + vinsertf128
19700b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1},  // vpperm
19710b57cec5SDimitry Andric   };
19720b57cec5SDimitry Andric 
19730b57cec5SDimitry Andric   if (ST->hasXOP())
19740b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second))
19750b57cec5SDimitry Andric       return LT.first * Entry->Cost;
19760b57cec5SDimitry Andric 
19770b57cec5SDimitry Andric   static const CostTblEntry AVX1ShuffleTbl[] = {
19780b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v4f64, 2},  // vperm2f128 + vpermilpd
19790b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v8f32, 2},  // vperm2f128 + vpermilps
19800b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v4i64, 2},  // vperm2f128 + vpermilpd
19810b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v8i32, 2},  // vperm2f128 + vpermilps
19820b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128
1983fcaf7f86SDimitry Andric       {TTI::SK_Broadcast, MVT::v16f16, 3}, // vpshuflw + vpshufd + vinsertf128
19840b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v32i8, 2},  // vpshufb + vinsertf128
19850b57cec5SDimitry Andric 
19860b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v4f64, 2},  // vperm2f128 + vpermilpd
19870b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v8f32, 2},  // vperm2f128 + vpermilps
19880b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v4i64, 2},  // vperm2f128 + vpermilpd
19890b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v8i32, 2},  // vperm2f128 + vpermilps
19900b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
19910b57cec5SDimitry Andric                                          // + vinsertf128
1992fcaf7f86SDimitry Andric       {TTI::SK_Reverse, MVT::v16f16, 4}, // vextractf128 + 2*pshufb
1993fcaf7f86SDimitry Andric                                          // + vinsertf128
19940b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v32i8, 4},  // vextractf128 + 2*pshufb
19950b57cec5SDimitry Andric                                          // + vinsertf128
19960b57cec5SDimitry Andric 
19970b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v4i64, 1},  // vblendpd
19980b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v4f64, 1},  // vblendpd
19990b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v8i32, 1},  // vblendps
20000b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v8f32, 1},  // vblendps
20010b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor
2002fcaf7f86SDimitry Andric       {TTI::SK_Select, MVT::v16f16, 3}, // vpand + vpandn + vpor
20030b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v32i8, 3},  // vpand + vpandn + vpor
20040b57cec5SDimitry Andric 
2005bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v4i64,  2}, // vperm2f128 + shufpd
2006bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v4f64,  2}, // vperm2f128 + shufpd
2007bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8i32,  4}, // 2*vperm2f128 + 2*vshufps
2008bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8f32,  4}, // 2*vperm2f128 + 2*vshufps
2009bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v16i16, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
2010bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v16f16, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
2011bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v32i8,  5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128
2012bdd1243dSDimitry Andric 
20130b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2},  // vperm2f128 + vshufpd
20140b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2},  // vperm2f128 + vshufpd
20150b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4},  // 2*vperm2f128 + 2*vshufps
20160b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4},  // 2*vperm2f128 + 2*vshufps
20170b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb
20180b57cec5SDimitry Andric                                                   // + 2*por + vinsertf128
2019fcaf7f86SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16f16, 8}, // vextractf128 + 4*pshufb
2020fcaf7f86SDimitry Andric                                                   // + 2*por + vinsertf128
20210b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8},  // vextractf128 + 4*pshufb
20220b57cec5SDimitry Andric                                                   // + 2*por + vinsertf128
20230b57cec5SDimitry Andric 
20240b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3},   // 2*vperm2f128 + vshufpd
20250b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3},   // 2*vperm2f128 + vshufpd
20260b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4},   // 2*vperm2f128 + 2*vshufps
20270b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4},   // 2*vperm2f128 + 2*vshufps
20280b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb
20290b57cec5SDimitry Andric                                                 // + 4*por + vinsertf128
2030fcaf7f86SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16f16, 15}, // 2*vextractf128 + 8*pshufb
2031fcaf7f86SDimitry Andric                                                 // + 4*por + vinsertf128
20320b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15},  // 2*vextractf128 + 8*pshufb
20330b57cec5SDimitry Andric                                                 // + 4*por + vinsertf128
20340b57cec5SDimitry Andric   };
20350b57cec5SDimitry Andric 
20360b57cec5SDimitry Andric   if (ST->hasAVX())
20370b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
20380b57cec5SDimitry Andric       return LT.first * Entry->Cost;
20390b57cec5SDimitry Andric 
20400b57cec5SDimitry Andric   static const CostTblEntry SSE41ShuffleTbl[] = {
20410b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v2i64, 1}, // pblendw
20420b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v2f64, 1}, // movsd
20430b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v4i32, 1}, // pblendw
20440b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v4f32, 1}, // blendps
20450b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v8i16, 1}, // pblendw
2046fcaf7f86SDimitry Andric       {TTI::SK_Select, MVT::v8f16, 1}, // pblendw
20470b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v16i8, 1}  // pblendvb
20480b57cec5SDimitry Andric   };
20490b57cec5SDimitry Andric 
20500b57cec5SDimitry Andric   if (ST->hasSSE41())
20510b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
20520b57cec5SDimitry Andric       return LT.first * Entry->Cost;
20530b57cec5SDimitry Andric 
20540b57cec5SDimitry Andric   static const CostTblEntry SSSE3ShuffleTbl[] = {
20550b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb
2056fcaf7f86SDimitry Andric       {TTI::SK_Broadcast, MVT::v8f16, 1}, // pshufb
20570b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb
20580b57cec5SDimitry Andric 
20590b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb
2060fcaf7f86SDimitry Andric       {TTI::SK_Reverse, MVT::v8f16, 1}, // pshufb
20610b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb
20620b57cec5SDimitry Andric 
20630b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por
2064fcaf7f86SDimitry Andric       {TTI::SK_Select, MVT::v8f16, 3}, // 2*pshufb + por
20650b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por
20660b57cec5SDimitry Andric 
2067bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v4i32, 1}, // palignr
2068bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v4f32, 1}, // palignr
2069bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8i16, 1}, // palignr
2070bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8f16, 1}, // palignr
2071bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v16i8, 1}, // palignr
2072bdd1243dSDimitry Andric 
20730b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb
2074fcaf7f86SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // pshufb
20750b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
20760b57cec5SDimitry Andric 
20770b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por
2078fcaf7f86SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v8f16, 3}, // 2*pshufb + por
20790b57cec5SDimitry Andric       {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por
20800b57cec5SDimitry Andric   };
20810b57cec5SDimitry Andric 
20820b57cec5SDimitry Andric   if (ST->hasSSSE3())
20830b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
20840b57cec5SDimitry Andric       return LT.first * Entry->Cost;
20850b57cec5SDimitry Andric 
20860b57cec5SDimitry Andric   static const CostTblEntry SSE2ShuffleTbl[] = {
20870b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v2f64, 1}, // shufpd
20880b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd
20890b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd
20900b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd
2091fcaf7f86SDimitry Andric       {TTI::SK_Broadcast, MVT::v8f16, 2}, // pshuflw + pshufd
20920b57cec5SDimitry Andric       {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd
20930b57cec5SDimitry Andric 
20940b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd
20950b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd
20960b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd
20970b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd
2098fcaf7f86SDimitry Andric       {TTI::SK_Reverse, MVT::v8f16, 3}, // pshuflw + pshufhw + pshufd
20990b57cec5SDimitry Andric       {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw
21000b57cec5SDimitry Andric                                         // + 2*pshufd + 2*unpck + packus
21010b57cec5SDimitry Andric 
21020b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v2i64, 1}, // movsd
21030b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v2f64, 1}, // movsd
21040b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps
21050b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por
2106fcaf7f86SDimitry Andric       {TTI::SK_Select, MVT::v8f16, 3}, // pand + pandn + por
21070b57cec5SDimitry Andric       {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por
21080b57cec5SDimitry Andric 
2109bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v2i64, 1}, // shufpd
2110bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v2f64, 1}, // shufpd
2111bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v4i32, 2}, // 2*{unpck,movsd,pshufd}
2112bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8i16, 3}, // psrldq + psrlldq + por
2113bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v8f16, 3}, // psrldq + psrlldq + por
2114bdd1243dSDimitry Andric       {TTI::SK_Splice, MVT::v16i8, 3}, // psrldq + psrlldq + por
2115bdd1243dSDimitry Andric 
21160b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd
21170b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd
21180b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd
21190b57cec5SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw
21200b57cec5SDimitry Andric                                                   // + pshufd/unpck
2121fcaf7f86SDimitry Andric       {TTI::SK_PermuteSingleSrc, MVT::v8f16, 5}, // 2*pshuflw + 2*pshufhw
2122fcaf7f86SDimitry Andric                                                   // + pshufd/unpck
21230b57cec5SDimitry Andric     { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
21240b57cec5SDimitry Andric                                                   // + 2*pshufd + 2*unpck + 2*packus
21250b57cec5SDimitry Andric 
21260b57cec5SDimitry Andric     { TTI::SK_PermuteTwoSrc,    MVT::v2f64,  1 }, // shufpd
21270b57cec5SDimitry Andric     { TTI::SK_PermuteTwoSrc,    MVT::v2i64,  1 }, // shufpd
21280b57cec5SDimitry Andric     { TTI::SK_PermuteTwoSrc,    MVT::v4i32,  2 }, // 2*{unpck,movsd,pshufd}
21290b57cec5SDimitry Andric     { TTI::SK_PermuteTwoSrc,    MVT::v8i16,  8 }, // blend+permute
2130fcaf7f86SDimitry Andric     { TTI::SK_PermuteTwoSrc,    MVT::v8f16,  8 }, // blend+permute
21310b57cec5SDimitry Andric     { TTI::SK_PermuteTwoSrc,    MVT::v16i8, 13 }, // blend+permute
21320b57cec5SDimitry Andric   };
21330b57cec5SDimitry Andric 
213481ad6265SDimitry Andric   static const CostTblEntry SSE3BroadcastLoadTbl[] = {
213581ad6265SDimitry Andric       {TTI::SK_Broadcast, MVT::v2f64, 0}, // broadcast handled by movddup
213681ad6265SDimitry Andric   };
213781ad6265SDimitry Andric 
213881ad6265SDimitry Andric   if (ST->hasSSE2()) {
213981ad6265SDimitry Andric     bool IsLoad =
214081ad6265SDimitry Andric         llvm::any_of(Args, [](const auto &V) { return isa<LoadInst>(V); });
214181ad6265SDimitry Andric     if (ST->hasSSE3() && IsLoad)
214281ad6265SDimitry Andric       if (const auto *Entry =
214381ad6265SDimitry Andric               CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
214481ad6265SDimitry Andric         assert(isLegalBroadcastLoad(BaseTp->getElementType(),
214581ad6265SDimitry Andric                                     LT.second.getVectorElementCount()) &&
214681ad6265SDimitry Andric                "Table entry missing from isLegalBroadcastLoad()");
214781ad6265SDimitry Andric         return LT.first * Entry->Cost;
214881ad6265SDimitry Andric       }
214981ad6265SDimitry Andric 
21500b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
21510b57cec5SDimitry Andric       return LT.first * Entry->Cost;
215281ad6265SDimitry Andric   }
21530b57cec5SDimitry Andric 
21540b57cec5SDimitry Andric   static const CostTblEntry SSE1ShuffleTbl[] = {
21550b57cec5SDimitry Andric     { TTI::SK_Broadcast,        MVT::v4f32, 1 }, // shufps
21560b57cec5SDimitry Andric     { TTI::SK_Reverse,          MVT::v4f32, 1 }, // shufps
21570b57cec5SDimitry Andric     { TTI::SK_Select,           MVT::v4f32, 2 }, // 2*shufps
2158bdd1243dSDimitry Andric     { TTI::SK_Splice,           MVT::v4f32, 2 }, // 2*shufps
21590b57cec5SDimitry Andric     { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps
21600b57cec5SDimitry Andric     { TTI::SK_PermuteTwoSrc,    MVT::v4f32, 2 }, // 2*shufps
21610b57cec5SDimitry Andric   };
21620b57cec5SDimitry Andric 
21630b57cec5SDimitry Andric   if (ST->hasSSE1())
21640b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
21650b57cec5SDimitry Andric       return LT.first * Entry->Cost;
21660b57cec5SDimitry Andric 
2167bdd1243dSDimitry Andric   return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);
21680b57cec5SDimitry Andric }
21690b57cec5SDimitry Andric 
2170fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
2171fe6060f1SDimitry Andric                                              Type *Src,
2172e8d8bef9SDimitry Andric                                              TTI::CastContextHint CCH,
21735ffd83dbSDimitry Andric                                              TTI::TargetCostKind CostKind,
21740b57cec5SDimitry Andric                                              const Instruction *I) {
21750b57cec5SDimitry Andric   int ISD = TLI->InstructionOpcodeToISD(Opcode);
21760b57cec5SDimitry Andric   assert(ISD && "Invalid opcode");
21770b57cec5SDimitry Andric 
2178fe6060f1SDimitry Andric   // The cost tables include both specific, custom (non-legal) src/dst type
2179fe6060f1SDimitry Andric   // conversions and generic, legalized types. We test for customs first, before
2180fe6060f1SDimitry Andric   // falling back to legalization.
21810b57cec5SDimitry Andric   // FIXME: Need a better design of the cost table to handle non-simple types of
21820b57cec5SDimitry Andric   // potential massive combinations (elem_num x src_type x dst_type).
2183*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry AVX512BWConversionTbl[]{
2184*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8,  { 1, 1, 1, 1 } },
2185*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8,  { 1, 1, 1, 1 } },
21860b57cec5SDimitry Andric 
21870b57cec5SDimitry Andric     // Mask sign extend has an instruction.
2188*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i8,   MVT::v2i1,   { 1, 1, 1, 1 } },
2189*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v2i1,   { 1, 1, 1, 1 } },
2190*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i16,  MVT::v2i1,   { 1, 1, 1, 1 } },
2191*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v2i1,   { 1, 1, 1, 1 } },
2192*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i8,   MVT::v4i1,   { 1, 1, 1, 1 } },
2193*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v4i1,   { 1, 1, 1, 1 } },
2194*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i1,   { 1, 1, 1, 1 } },
2195*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v4i1,   { 1, 1, 1, 1 } },
2196*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i8,   MVT::v8i1,   { 1, 1, 1, 1 } },
2197*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v8i1,   { 1, 1, 1, 1 } },
2198*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v8i1,   { 1, 1, 1, 1 } },
2199*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v16i1,  { 1, 1, 1, 1 } },
2200*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1,  { 1, 1, 1, 1 } },
2201*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v32i8,  MVT::v32i1,  { 1, 1, 1, 1 } },
2202*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1,  { 1, 1, 1, 1 } },
2203*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v64i8,  MVT::v64i1,  { 1, 1, 1, 1 } },
2204*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v64i1,  { 1, 1, 1, 1 } },
22050b57cec5SDimitry Andric 
22065ffd83dbSDimitry Andric     // Mask zero extend is a sext + shift.
2207*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i8,   MVT::v2i1,   { 2, 1, 1, 1 } },
2208*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v2i1,   { 2, 1, 1, 1 } },
2209*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i16,  MVT::v2i1,   { 2, 1, 1, 1 } },
2210*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v2i1,   { 2, 1, 1, 1 } },
2211*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i8,   MVT::v4i1,   { 2, 1, 1, 1 } },
2212*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v4i1,   { 2, 1, 1, 1 } },
2213*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i1,   { 2, 1, 1, 1 } },
2214*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v4i1,   { 2, 1, 1, 1 } },
2215*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i8,   MVT::v8i1,   { 2, 1, 1, 1 } },
2216*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v8i1,   { 2, 1, 1, 1 } },
2217*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v8i1,   { 2, 1, 1, 1 } },
2218*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v16i1,  { 2, 1, 1, 1 } },
2219*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1,  { 2, 1, 1, 1 } },
2220*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v32i8,  MVT::v32i1,  { 2, 1, 1, 1 } },
2221*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1,  { 2, 1, 1, 1 } },
2222*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v64i8,  MVT::v64i1,  { 2, 1, 1, 1 } },
2223*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v64i1,  { 2, 1, 1, 1 } },
22244824e7fdSDimitry Andric 
2225*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i8,   { 2, 1, 1, 1 } },
2226*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v16i8,  { 2, 1, 1, 1 } },
2227*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i16,  { 2, 1, 1, 1 } },
2228*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v8i16,  { 2, 1, 1, 1 } },
2229*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i8,   { 2, 1, 1, 1 } },
2230*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v16i8,  { 2, 1, 1, 1 } },
2231*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i16,  { 2, 1, 1, 1 } },
2232*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v8i16,  { 2, 1, 1, 1 } },
2233*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i8,   { 2, 1, 1, 1 } },
2234*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v16i8,  { 2, 1, 1, 1 } },
2235*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i16,  { 2, 1, 1, 1 } },
2236*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v16i8,  { 2, 1, 1, 1 } },
2237*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v16i16, { 2, 1, 1, 1 } },
2238*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v32i1,  MVT::v32i8,  { 2, 1, 1, 1 } },
2239*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v32i1,  MVT::v32i16, { 2, 1, 1, 1 } },
2240*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v64i1,  MVT::v64i8,  { 2, 1, 1, 1 } },
2241*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v64i1,  MVT::v32i16, { 2, 1, 1, 1 } },
22425ffd83dbSDimitry Andric 
2243*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v32i8,  MVT::v32i16, { 2, 1, 1, 1 } },
2244*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i16, { 2, 1, 1, 1 } }, // widen to zmm
2245*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i8,   MVT::v2i16,  { 2, 1, 1, 1 } }, // vpmovwb
2246*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i8,   MVT::v4i16,  { 2, 1, 1, 1 } }, // vpmovwb
2247*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i8,   MVT::v8i16,  { 2, 1, 1, 1 } }, // vpmovwb
22480b57cec5SDimitry Andric   };
22490b57cec5SDimitry Andric 
2250*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry AVX512DQConversionTbl[] = {
22514824e7fdSDimitry Andric     // Mask sign extend has an instruction.
2252*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64,  MVT::v2i1,   { 1, 1, 1, 1 } },
2253*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v2i1,   { 1, 1, 1, 1 } },
2254*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v4i1,   { 1, 1, 1, 1 } },
2255*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i1,   { 1, 1, 1, 1 } },
2256*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,   { 1, 1, 1, 1 } },
2257*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v16i1,  { 1, 1, 1, 1 } },
2258*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i1,   { 1, 1, 1, 1 } },
2259*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1,  { 1, 1, 1, 1 } },
22604824e7fdSDimitry Andric 
22614824e7fdSDimitry Andric     // Mask zero extend is a sext + shift.
2262*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64,  MVT::v2i1,   { 2, 1, 1, 1, } },
2263*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v2i1,   { 2, 1, 1, 1, } },
2264*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i1,   { 2, 1, 1, 1, } },
2265*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i1,   { 2, 1, 1, 1, } },
2266*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i1,   { 2, 1, 1, 1, } },
2267*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v16i1,  { 2, 1, 1, 1, } },
2268*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i1,   { 2, 1, 1, 1, } },
2269*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1,  { 2, 1, 1, 1, } },
22704824e7fdSDimitry Andric 
2271*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i64,  { 2, 1, 1, 1 } },
2272*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v4i32,  { 2, 1, 1, 1 } },
2273*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i32,  { 2, 1, 1, 1 } },
2274*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i64,  { 2, 1, 1, 1 } },
2275*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i32,  { 2, 1, 1, 1 } },
2276*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i64,  { 2, 1, 1, 1 } },
2277*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v16i32, { 2, 1, 1, 1 } },
2278*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v8i64,  { 2, 1, 1, 1 } },
22794824e7fdSDimitry Andric 
2280*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v8i64,  { 1, 1, 1, 1 } },
2281*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i64,  { 1, 1, 1, 1 } },
22820b57cec5SDimitry Andric 
2283*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i64,  { 1, 1, 1, 1 } },
2284*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64,  { 1, 1, 1, 1 } },
22850b57cec5SDimitry Andric 
2286*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i64,  MVT::v8f32,  { 1, 1, 1, 1 } },
2287*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i64,  MVT::v8f64,  { 1, 1, 1, 1 } },
22880b57cec5SDimitry Andric 
2289*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i64,  MVT::v8f32,  { 1, 1, 1, 1 } },
2290*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i64,  MVT::v8f64,  { 1, 1, 1, 1 } },
22910b57cec5SDimitry Andric   };
22920b57cec5SDimitry Andric 
22930b57cec5SDimitry Andric   // TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and
22940b57cec5SDimitry Andric   // 256-bit wide vectors.
22950b57cec5SDimitry Andric 
2296*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry AVX512FConversionTbl[] = {
2297*0fca6ea1SDimitry Andric     { ISD::FP_EXTEND, MVT::v8f64,   MVT::v8f32,   { 1, 1, 1, 1 } },
2298*0fca6ea1SDimitry Andric     { ISD::FP_EXTEND, MVT::v8f64,   MVT::v16f32,  { 3, 1, 1, 1 } },
2299*0fca6ea1SDimitry Andric     { ISD::FP_EXTEND, MVT::v16f64,  MVT::v16f32,  { 4, 1, 1, 1 } }, // 2*vcvtps2pd+vextractf64x4
2300*0fca6ea1SDimitry Andric     { ISD::FP_ROUND,  MVT::v8f32,   MVT::v8f64,   { 1, 1, 1, 1 } },
23010b57cec5SDimitry Andric 
2302*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
2303*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
2304*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
2305*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i1,   MVT::v16i8,   { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
2306*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i16,   { 3, 1, 1, 1 } }, // sext+vpsllq+vptestmq
2307*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i16,   { 3, 1, 1, 1 } }, // sext+vpsllq+vptestmq
2308*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i16,   { 3, 1, 1, 1 } }, // sext+vpsllq+vptestmq
2309*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i1,   MVT::v16i16,  { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
2310*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i32,   { 2, 1, 1, 1 } }, // zmm vpslld+vptestmd
2311*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i32,   { 2, 1, 1, 1 } }, // zmm vpslld+vptestmd
2312*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i32,   { 2, 1, 1, 1 } }, // zmm vpslld+vptestmd
2313*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i1,   MVT::v16i32,  { 2, 1, 1, 1 } }, // vpslld+vptestmd
2314*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i64,   { 2, 1, 1, 1 } }, // zmm vpsllq+vptestmq
2315*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i64,   { 2, 1, 1, 1 } }, // zmm vpsllq+vptestmq
2316*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i64,   { 2, 1, 1, 1 } }, // vpsllq+vptestmq
2317*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i8,    MVT::v2i32,   { 2, 1, 1, 1 } }, // vpmovdb
2318*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i8,    MVT::v4i32,   { 2, 1, 1, 1 } }, // vpmovdb
2319*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i8,   MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdb
2320*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v32i8,   MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdb
2321*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v64i8,   MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdb
2322*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i16,  MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdw
2323*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v32i16,  MVT::v16i32,  { 2, 1, 1, 1 } }, // vpmovdw
2324*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i8,    MVT::v2i64,   { 2, 1, 1, 1 } }, // vpmovqb
2325*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i16,   MVT::v2i64,   { 1, 1, 1, 1 } }, // vpshufb
2326*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i8,    MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqb
2327*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i8,   MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqb
2328*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v32i8,   MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqb
2329*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v64i8,   MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqb
2330*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i16,   MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqw
2331*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i16,  MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqw
2332*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v32i16,  MVT::v8i64,   { 2, 1, 1, 1 } }, // vpmovqw
2333*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i32,   MVT::v8i64,   { 1, 1, 1, 1 } }, // vpmovqd
2334*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i32,   MVT::v4i64,   { 1, 1, 1, 1 } }, // zmm vpmovqd
2335*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i8,   MVT::v16i64,  { 5, 1, 1, 1 } },// 2*vpmovqd+concat+vpmovdb
23360b57cec5SDimitry Andric 
2337*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i8,  MVT::v16i16,   { 3, 1, 1, 1 } }, // extend to v16i32
2338*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v32i8,  MVT::v32i16,   { 8, 1, 1, 1 } },
2339*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v64i8,  MVT::v32i16,   { 8, 1, 1, 1 } },
23405ffd83dbSDimitry Andric 
23415ffd83dbSDimitry Andric     // Sign extend is zmm vpternlogd+vptruncdb.
23425ffd83dbSDimitry Andric     // Zero extend is zmm broadcast load+vptruncdw.
2343*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i8,   MVT::v2i1,   { 3, 1, 1, 1 } },
2344*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i8,   MVT::v2i1,   { 4, 1, 1, 1 } },
2345*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i8,   MVT::v4i1,   { 3, 1, 1, 1 } },
2346*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i8,   MVT::v4i1,   { 4, 1, 1, 1 } },
2347*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i8,   MVT::v8i1,   { 3, 1, 1, 1 } },
2348*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i8,   MVT::v8i1,   { 4, 1, 1, 1 } },
2349*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v16i1,  { 3, 1, 1, 1 } },
2350*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v16i1,  { 4, 1, 1, 1 } },
23515ffd83dbSDimitry Andric 
23525ffd83dbSDimitry Andric     // Sign extend is zmm vpternlogd+vptruncdw.
23535ffd83dbSDimitry Andric     // Zero extend is zmm vpternlogd+vptruncdw+vpsrlw.
2354*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i16,  MVT::v2i1,   { 3, 1, 1, 1 } },
2355*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i16,  MVT::v2i1,   { 4, 1, 1, 1 } },
2356*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i1,   { 3, 1, 1, 1 } },
2357*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i1,   { 4, 1, 1, 1 } },
2358*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v8i1,   { 3, 1, 1, 1 } },
2359*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v8i1,   { 4, 1, 1, 1 } },
2360*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1,  { 3, 1, 1, 1 } },
2361*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1,  { 4, 1, 1, 1 } },
23625ffd83dbSDimitry Andric 
2363*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i32,  MVT::v2i1,   { 1, 1, 1, 1 } }, // zmm vpternlogd
2364*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i32,  MVT::v2i1,   { 2, 1, 1, 1 } }, // zmm vpternlogd+psrld
2365*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v4i1,   { 1, 1, 1, 1 } }, // zmm vpternlogd
2366*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i1,   { 2, 1, 1, 1 } }, // zmm vpternlogd+psrld
2367*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,   { 1, 1, 1, 1 } }, // zmm vpternlogd
2368*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i1,   { 2, 1, 1, 1 } }, // zmm vpternlogd+psrld
2369*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64,  MVT::v2i1,   { 1, 1, 1, 1 } }, // zmm vpternlogq
2370*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64,  MVT::v2i1,   { 2, 1, 1, 1 } }, // zmm vpternlogq+psrlq
2371*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i1,   { 1, 1, 1, 1 } }, // zmm vpternlogq
2372*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i1,   { 2, 1, 1, 1 } }, // zmm vpternlogq+psrlq
23735ffd83dbSDimitry Andric 
2374*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1,  { 1, 1, 1, 1 } }, // vpternlogd
2375*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1,  { 2, 1, 1, 1 } }, // vpternlogd+psrld
2376*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i1,   { 1, 1, 1, 1 } }, // vpternlogq
2377*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i1,   { 2, 1, 1, 1 } }, // vpternlogq+psrlq
23785ffd83dbSDimitry Andric 
2379*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8,  { 1, 1, 1, 1 } },
2380*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8,  { 1, 1, 1, 1 } },
2381*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, { 1, 1, 1, 1 } },
2382*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, { 1, 1, 1, 1 } },
2383*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i8,   { 1, 1, 1, 1 } },
2384*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i8,   { 1, 1, 1, 1 } },
2385*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i16,  { 1, 1, 1, 1 } },
2386*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i16,  { 1, 1, 1, 1 } },
2387*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i32,  { 1, 1, 1, 1 } },
2388*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i32,  { 1, 1, 1, 1 } },
23890b57cec5SDimitry Andric 
2390*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8,  { 3, 1, 1, 1 } }, // FIXME: May not be right
2391*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8,  { 3, 1, 1, 1 } }, // FIXME: May not be right
23925ffd83dbSDimitry Andric 
2393*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i1,   { 4, 1, 1, 1 } },
2394*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i1,  { 3, 1, 1, 1 } },
2395*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v16i8,  { 2, 1, 1, 1 } },
2396*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i8,  { 1, 1, 1, 1 } },
2397*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i16,  { 2, 1, 1, 1 } },
2398*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, { 1, 1, 1, 1 } },
2399*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  { 1, 1, 1, 1 } },
2400*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, { 1, 1, 1, 1 } },
24010b57cec5SDimitry Andric 
2402*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i1,   { 4, 1, 1, 1 } },
2403*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i1,  { 3, 1, 1, 1 } },
2404*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v16i8,  { 2, 1, 1, 1 } },
2405*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i8,  { 1, 1, 1, 1 } },
2406*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i16,  { 2, 1, 1, 1 } },
2407*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, { 1, 1, 1, 1 } },
2408*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  { 1, 1, 1, 1 } },
2409*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, { 1, 1, 1, 1 } },
2410*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i64,  {26, 1, 1, 1 } },
2411*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64,  { 5, 1, 1, 1 } },
24125ffd83dbSDimitry Andric 
2413*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v16f32, { 2, 1, 1, 1 } },
2414*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v16f64, { 7, 1, 1, 1 } },
2415*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v32i8,  MVT::v32f64, {15, 1, 1, 1 } },
2416*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v64i8,  MVT::v64f32, {11, 1, 1, 1 } },
2417*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v64i8,  MVT::v64f64, {31, 1, 1, 1 } },
2418*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i16,  MVT::v8f64,  { 3, 1, 1, 1 } },
2419*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v16f64, { 7, 1, 1, 1 } },
2420*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v32i16, MVT::v32f32, { 5, 1, 1, 1 } },
2421*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v32i16, MVT::v32f64, {15, 1, 1, 1 } },
2422*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i32,  MVT::v8f64,  { 1, 1, 1, 1 } },
2423*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i32, MVT::v16f64, { 3, 1, 1, 1 } },
24245ffd83dbSDimitry Andric 
2425*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v8f64,  { 1, 1, 1, 1 } },
2426*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v8f64,  { 3, 1, 1, 1 } },
2427*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i8,   MVT::v8f64,  { 3, 1, 1, 1 } },
2428*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i32, MVT::v16f32, { 1, 1, 1, 1 } },
2429*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v16f32, { 3, 1, 1, 1 } },
2430*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v16f32, { 3, 1, 1, 1 } },
24315ffd83dbSDimitry Andric   };
24325ffd83dbSDimitry Andric 
2433*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry AVX512BWVLConversionTbl[] {
24345ffd83dbSDimitry Andric     // Mask sign extend has an instruction.
2435*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i8,   MVT::v2i1,   { 1, 1, 1, 1 } },
2436*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v2i1,   { 1, 1, 1, 1 } },
2437*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i16,  MVT::v2i1,   { 1, 1, 1, 1 } },
2438*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v2i1,   { 1, 1, 1, 1 } },
2439*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i1,   { 1, 1, 1, 1 } },
2440*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v4i1,   { 1, 1, 1, 1 } },
2441*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i8,   MVT::v4i1,   { 1, 1, 1, 1 } },
2442*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v4i1,   { 1, 1, 1, 1 } },
2443*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i8,   MVT::v8i1,   { 1, 1, 1, 1 } },
2444*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v8i1,   { 1, 1, 1, 1 } },
2445*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v8i1,   { 1, 1, 1, 1 } },
2446*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v16i1,  { 1, 1, 1, 1 } },
2447*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1,  { 1, 1, 1, 1 } },
2448*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v32i8,  MVT::v32i1,  { 1, 1, 1, 1 } },
2449*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v32i1,  { 1, 1, 1, 1 } },
2450*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v32i8,  MVT::v64i1,  { 1, 1, 1, 1 } },
2451*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v64i1,  { 1, 1, 1, 1 } },
24525ffd83dbSDimitry Andric 
24535ffd83dbSDimitry Andric     // Mask zero extend is a sext + shift.
2454*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i8,   MVT::v2i1,   { 2, 1, 1, 1 } },
2455*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v2i1,   { 2, 1, 1, 1 } },
2456*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i16,  MVT::v2i1,   { 2, 1, 1, 1 } },
2457*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v2i1,   { 2, 1, 1, 1 } },
2458*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i8,   MVT::v4i1,   { 2, 1, 1, 1 } },
2459*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v4i1,   { 2, 1, 1, 1 } },
2460*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i1,   { 2, 1, 1, 1 } },
2461*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v4i1,   { 2, 1, 1, 1 } },
2462*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i8,   MVT::v8i1,   { 2, 1, 1, 1 } },
2463*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v8i1,   { 2, 1, 1, 1 } },
2464*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v8i1,   { 2, 1, 1, 1 } },
2465*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v16i1,  { 2, 1, 1, 1 } },
2466*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1,  { 2, 1, 1, 1 } },
2467*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v32i8,  MVT::v32i1,  { 2, 1, 1, 1 } },
2468*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v32i1,  { 2, 1, 1, 1 } },
2469*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v32i8,  MVT::v64i1,  { 2, 1, 1, 1 } },
2470*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v64i1,  { 2, 1, 1, 1 } },
24714824e7fdSDimitry Andric 
2472*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i8,   { 2, 1, 1, 1 } },
2473*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v16i8,  { 2, 1, 1, 1 } },
2474*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i16,  { 2, 1, 1, 1 } },
2475*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v8i16,  { 2, 1, 1, 1 } },
2476*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i8,   { 2, 1, 1, 1 } },
2477*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v16i8,  { 2, 1, 1, 1 } },
2478*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i16,  { 2, 1, 1, 1 } },
2479*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v8i16,  { 2, 1, 1, 1 } },
2480*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i8,   { 2, 1, 1, 1 } },
2481*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v16i8,  { 2, 1, 1, 1 } },
2482*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i16,  { 2, 1, 1, 1 } },
2483*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v16i8,  { 2, 1, 1, 1 } },
2484*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v16i16, { 2, 1, 1, 1 } },
2485*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v32i1,  MVT::v32i8,  { 2, 1, 1, 1 } },
2486*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v32i1,  MVT::v16i16, { 2, 1, 1, 1 } },
2487*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v64i1,  MVT::v32i8,  { 2, 1, 1, 1 } },
2488*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v64i1,  MVT::v16i16, { 2, 1, 1, 1 } },
24895ffd83dbSDimitry Andric 
2490*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i16, { 2, 1, 1, 1 } },
24915ffd83dbSDimitry Andric   };
24925ffd83dbSDimitry Andric 
2493*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry AVX512DQVLConversionTbl[] = {
24944824e7fdSDimitry Andric     // Mask sign extend has an instruction.
2495*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64,  MVT::v2i1,   { 1, 1, 1, 1 } },
2496*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v2i1,   { 1, 1, 1, 1 } },
2497*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v4i1,   { 1, 1, 1, 1 } },
2498*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v16i1,  { 1, 1, 1, 1 } },
2499*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i1,   { 1, 1, 1, 1 } },
2500*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v8i1,   { 1, 1, 1, 1 } },
2501*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v16i1,  { 1, 1, 1, 1 } },
2502*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,   { 1, 1, 1, 1 } },
25034824e7fdSDimitry Andric 
25044824e7fdSDimitry Andric     // Mask zero extend is a sext + shift.
2505*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64,  MVT::v2i1,   { 2, 1, 1, 1 } },
2506*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v2i1,   { 2, 1, 1, 1 } },
2507*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i1,   { 2, 1, 1, 1 } },
2508*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v16i1,  { 2, 1, 1, 1 } },
2509*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i1,   { 2, 1, 1, 1 } },
2510*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v8i1,   { 2, 1, 1, 1 } },
2511*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v16i1,  { 2, 1, 1, 1 } },
2512*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i1,   { 2, 1, 1, 1 } },
25134824e7fdSDimitry Andric 
2514*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v4i64,  { 2, 1, 1, 1 } },
2515*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v8i32,  { 2, 1, 1, 1 } },
2516*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i64,  { 2, 1, 1, 1 } },
2517*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v4i32,  { 2, 1, 1, 1 } },
2518*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i32,  { 2, 1, 1, 1 } },
2519*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i64,  { 2, 1, 1, 1 } },
2520*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v4i64,  { 2, 1, 1, 1 } },
2521*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i32,  { 2, 1, 1, 1 } },
25224824e7fdSDimitry Andric 
2523*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  { 1, 1, 1, 1 } },
2524*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  { 1, 1, 1, 1 } },
2525*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v4i64,  { 1, 1, 1, 1 } },
2526*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f64,  MVT::v4i64,  { 1, 1, 1, 1 } },
25275ffd83dbSDimitry Andric 
2528*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  { 1, 1, 1, 1 } },
2529*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  { 1, 1, 1, 1 } },
2530*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i64,  { 1, 1, 1, 1 } },
2531*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i64,  { 1, 1, 1, 1 } },
25325ffd83dbSDimitry Andric 
2533*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v2i64,  MVT::v4f32,  { 1, 1, 1, 1 } },
2534*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v4i64,  MVT::v4f32,  { 1, 1, 1, 1 } },
2535*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v2i64,  MVT::v2f64,  { 1, 1, 1, 1 } },
2536*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v4i64,  MVT::v4f64,  { 1, 1, 1, 1 } },
25375ffd83dbSDimitry Andric 
2538*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v2i64,  MVT::v4f32,  { 1, 1, 1, 1 } },
2539*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i64,  MVT::v4f32,  { 1, 1, 1, 1 } },
2540*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v2i64,  MVT::v2f64,  { 1, 1, 1, 1 } },
2541*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i64,  MVT::v4f64,  { 1, 1, 1, 1 } },
25425ffd83dbSDimitry Andric   };
25435ffd83dbSDimitry Andric 
2544*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry AVX512VLConversionTbl[] = {
2545*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
2546*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
2547*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
2548*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i1,   MVT::v16i8,   { 8, 1, 1, 1 } }, // split+2*v8i8
2549*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i16,   { 3, 1, 1, 1 } }, // sext+vpsllq+vptestmq
2550*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i16,   { 3, 1, 1, 1 } }, // sext+vpsllq+vptestmq
2551*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i16,   { 3, 1, 1, 1 } }, // sext+vpsllq+vptestmq
2552*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i1,   MVT::v16i16,  { 8, 1, 1, 1 } }, // split+2*v8i16
2553*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i32,   { 2, 1, 1, 1 } }, // vpslld+vptestmd
2554*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i32,   { 2, 1, 1, 1 } }, // vpslld+vptestmd
2555*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i1,    MVT::v8i32,   { 2, 1, 1, 1 } }, // vpslld+vptestmd
2556*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v16i1,   MVT::v8i32,   { 2, 1, 1, 1 } }, // vpslld+vptestmd
2557*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i64,   { 2, 1, 1, 1 } }, // vpsllq+vptestmq
2558*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i64,   { 2, 1, 1, 1 } }, // vpsllq+vptestmq
2559*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i32,   MVT::v4i64,   { 1, 1, 1, 1 } }, // vpmovqd
2560*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i8,    MVT::v4i64,   { 2, 1, 1, 1 } }, // vpmovqb
2561*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v4i16,   MVT::v4i64,   { 2, 1, 1, 1 } }, // vpmovqw
2562*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,  MVT::v8i8,    MVT::v8i32,   { 2, 1, 1, 1 } }, // vpmovwb
25635ffd83dbSDimitry Andric 
25645ffd83dbSDimitry Andric     // sign extend is vpcmpeq+maskedmove+vpmovdw+vpacksswb
25655ffd83dbSDimitry Andric     // zero extend is vpcmpeq+maskedmove+vpmovdw+vpsrlw+vpackuswb
2566*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i8,   MVT::v2i1,   { 5, 1, 1, 1 } },
2567*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i8,   MVT::v2i1,   { 6, 1, 1, 1 } },
2568*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i8,   MVT::v4i1,   { 5, 1, 1, 1 } },
2569*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i8,   MVT::v4i1,   { 6, 1, 1, 1 } },
2570*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i8,   MVT::v8i1,   { 5, 1, 1, 1 } },
2571*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i8,   MVT::v8i1,   { 6, 1, 1, 1 } },
2572*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v16i1,  {10, 1, 1, 1 } },
2573*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v16i1,  {12, 1, 1, 1 } },
25745ffd83dbSDimitry Andric 
25755ffd83dbSDimitry Andric     // sign extend is vpcmpeq+maskedmove+vpmovdw
25765ffd83dbSDimitry Andric     // zero extend is vpcmpeq+maskedmove+vpmovdw+vpsrlw
2577*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i16,  MVT::v2i1,   { 4, 1, 1, 1 } },
2578*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i16,  MVT::v2i1,   { 5, 1, 1, 1 } },
2579*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i1,   { 4, 1, 1, 1 } },
2580*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i1,   { 5, 1, 1, 1 } },
2581*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v8i1,   { 4, 1, 1, 1 } },
2582*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v8i1,   { 5, 1, 1, 1 } },
2583*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1,  {10, 1, 1, 1 } },
2584*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1,  {12, 1, 1, 1 } },
25855ffd83dbSDimitry Andric 
2586*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i32,  MVT::v2i1,   { 1, 1, 1, 1 } }, // vpternlogd
2587*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i32,  MVT::v2i1,   { 2, 1, 1, 1 } }, // vpternlogd+psrld
2588*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v4i1,   { 1, 1, 1, 1 } }, // vpternlogd
2589*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i1,   { 2, 1, 1, 1 } }, // vpternlogd+psrld
2590*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,   { 1, 1, 1, 1 } }, // vpternlogd
2591*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i1,   { 2, 1, 1, 1 } }, // vpternlogd+psrld
2592*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v16i1,  { 1, 1, 1, 1 } }, // vpternlogd
2593*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v16i1,  { 2, 1, 1, 1 } }, // vpternlogd+psrld
2594bdd1243dSDimitry Andric 
2595*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64,  MVT::v2i1,   { 1, 1, 1, 1 } }, // vpternlogq
2596*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64,  MVT::v2i1,   { 2, 1, 1, 1 } }, // vpternlogq+psrlq
2597*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i1,   { 1, 1, 1, 1 } }, // vpternlogq
2598*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i1,   { 2, 1, 1, 1 } }, // vpternlogq+psrlq
25995ffd83dbSDimitry Andric 
2600*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v16i8,  { 1, 1, 1, 1 } },
2601*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v16i8,  { 1, 1, 1, 1 } },
2602*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v16i8,  { 1, 1, 1, 1 } },
2603*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v16i8,  { 1, 1, 1, 1 } },
2604*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8,  { 1, 1, 1, 1 } },
2605*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8,  { 1, 1, 1, 1 } },
2606*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v8i16,  { 1, 1, 1, 1 } },
2607*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v8i16,  { 1, 1, 1, 1 } },
2608*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i16,  { 1, 1, 1, 1 } },
2609*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16,  { 1, 1, 1, 1 } },
2610*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i32,  { 1, 1, 1, 1 } },
2611*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i32,  { 1, 1, 1, 1 } },
2612fe6060f1SDimitry Andric 
2613*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v16i8,  { 1, 1, 1, 1 } },
2614*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v16i8,  { 1, 1, 1, 1 } },
2615*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v8i16,  { 1, 1, 1, 1 } },
2616*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v8i16,  { 1, 1, 1, 1 } },
2617fe6060f1SDimitry Andric 
2618*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f32,    MVT::i64,    { 1, 1, 1, 1 } },
2619*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    { 1, 1, 1, 1 } },
2620*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v16i8,  { 1, 1, 1, 1 } },
2621*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v16i8,  { 1, 1, 1, 1 } },
2622*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v8i16,  { 1, 1, 1, 1 } },
2623*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i16,  { 1, 1, 1, 1 } },
2624*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i32,  { 1, 1, 1, 1 } },
2625*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i32,  { 1, 1, 1, 1 } },
2626*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i32,  { 1, 1, 1, 1 } },
2627*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i32,  { 1, 1, 1, 1 } },
2628*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  { 5, 1, 1, 1 } },
2629*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  { 5, 1, 1, 1 } },
2630*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i64,  { 5, 1, 1, 1 } },
26310b57cec5SDimitry Andric 
2632*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v8f32,  { 2, 1, 1, 1 } },
2633*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v16f32, { 2, 1, 1, 1 } },
2634*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v32i8,  MVT::v32f32, { 5, 1, 1, 1 } },
26355ffd83dbSDimitry Andric 
2636*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f32,    { 1, 1, 1, 1 } },
2637*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f64,    { 1, 1, 1, 1 } },
2638*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f32,  { 1, 1, 1, 1 } },
2639*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v2f64,  { 1, 1, 1, 1 } },
2640*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f64,  { 1, 1, 1, 1 } },
2641*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v8f32,  { 1, 1, 1, 1 } },
2642*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v8f64,  { 1, 1, 1, 1 } },
26430b57cec5SDimitry Andric   };
26440b57cec5SDimitry Andric 
2645*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry AVX2ConversionTbl[] = {
2646*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i1,   { 3, 1, 1, 1 } },
2647*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i1,   { 3, 1, 1, 1 } },
2648*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,   { 3, 1, 1, 1 } },
2649*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i1,   { 3, 1, 1, 1 } },
2650*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1,  { 1, 1, 1, 1 } },
2651*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1,  { 1, 1, 1, 1 } },
2652fe6060f1SDimitry Andric 
2653*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v16i8,  { 2, 1, 1, 1 } },
2654*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v16i8,  { 2, 1, 1, 1 } },
2655*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v16i8,  { 2, 1, 1, 1 } },
2656*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v16i8,  { 2, 1, 1, 1 } },
2657*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8,  { 2, 1, 1, 1 } },
2658*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8,  { 2, 1, 1, 1 } },
2659*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v8i16,  { 2, 1, 1, 1 } },
2660*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v8i16,  { 2, 1, 1, 1 } },
2661*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i16,  { 2, 1, 1, 1 } },
2662*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16,  { 2, 1, 1, 1 } },
2663*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, { 3, 1, 1, 1 } },
2664*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, { 3, 1, 1, 1 } },
2665*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i32,  { 2, 1, 1, 1 } },
2666*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i32,  { 2, 1, 1, 1 } },
26675ffd83dbSDimitry Andric 
2668*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i32,  { 2, 1, 1, 1 } },
26690b57cec5SDimitry Andric 
2670*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2671*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i32, { 4, 1, 1, 1 } },
2672*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v8i16,  { 1, 1, 1, 1 } },
2673*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v4i32,  { 1, 1, 1, 1 } },
2674*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v2i64,  { 1, 1, 1, 1 } },
2675*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v8i32,  { 4, 1, 1, 1 } },
2676*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v4i64,  { 4, 1, 1, 1 } },
2677*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v4i32,  { 1, 1, 1, 1 } },
2678*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v2i64,  { 1, 1, 1, 1 } },
2679*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v4i64,  { 5, 1, 1, 1 } },
2680*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i32,  MVT::v4i64,  { 1, 1, 1, 1 } },
2681*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v8i32,  { 2, 1, 1, 1 } },
26820b57cec5SDimitry Andric 
2683*0fca6ea1SDimitry Andric     { ISD::FP_EXTEND,   MVT::v8f64,  MVT::v8f32,  { 3, 1, 1, 1 } },
2684*0fca6ea1SDimitry Andric     { ISD::FP_ROUND,    MVT::v8f32,  MVT::v8f64,  { 3, 1, 1, 1 } },
26850b57cec5SDimitry Andric 
2686*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v8f32,  { 1, 1, 1, 1 } },
2687*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v4i32,  MVT::v4f64,  { 1, 1, 1, 1 } },
2688*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i32,  MVT::v8f32,  { 1, 1, 1, 1 } },
2689*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i32,  MVT::v8f64,  { 3, 1, 1, 1 } },
2690fe6060f1SDimitry Andric 
2691*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f32,    { 3, 1, 1, 1 } },
2692*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f64,    { 3, 1, 1, 1 } },
2693*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v8f32,  { 1, 1, 1, 1 } },
2694*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f32,  { 3, 1, 1, 1 } },
2695*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v2f64,  { 4, 1, 1, 1 } },
2696*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f64,  { 4, 1, 1, 1 } },
2697*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v8f32,  { 3, 1, 1, 1 } },
2698*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v4f64,  { 4, 1, 1, 1 } },
2699fe6060f1SDimitry Andric 
2700*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v16i8,  { 2, 1, 1, 1 } },
2701*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v16i8,  { 2, 1, 1, 1 } },
2702*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v8i16,  { 2, 1, 1, 1 } },
2703*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v8i16,  { 2, 1, 1, 1 } },
2704*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f64,  MVT::v4i32,  { 1, 1, 1, 1 } },
2705*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v8i32,  { 1, 1, 1, 1 } },
2706*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  { 3, 1, 1, 1 } },
2707fe6060f1SDimitry Andric 
2708*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v16i8,  { 2, 1, 1, 1 } },
2709*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v16i8,  { 2, 1, 1, 1 } },
2710*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v8i16,  { 2, 1, 1, 1 } },
2711*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i16,  { 2, 1, 1, 1 } },
2712*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i32,  { 2, 1, 1, 1 } },
2713*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i32,  { 1, 1, 1, 1 } },
2714*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i32,  { 2, 1, 1, 1 } },
2715*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i32,  { 2, 1, 1, 1 } },
2716*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i32,  { 2, 1, 1, 1 } },
2717*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  { 4, 1, 1, 1 } },
27180b57cec5SDimitry Andric   };
27190b57cec5SDimitry Andric 
2720*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry AVXConversionTbl[] = {
2721*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i1,   { 4, 1, 1, 1 } },
2722*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i1,   { 4, 1, 1, 1 } },
2723*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,   { 4, 1, 1, 1 } },
2724*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i1,   { 4, 1, 1, 1 } },
2725*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1,  { 4, 1, 1, 1 } },
2726*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1,  { 4, 1, 1, 1 } },
2727fe6060f1SDimitry Andric 
2728*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v16i8,  { 3, 1, 1, 1 } },
2729*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v16i8,  { 3, 1, 1, 1 } },
2730*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v16i8,  { 3, 1, 1, 1 } },
2731*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v16i8,  { 3, 1, 1, 1 } },
2732*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8,  { 3, 1, 1, 1 } },
2733*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8,  { 3, 1, 1, 1 } },
2734*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v8i16,  { 3, 1, 1, 1 } },
2735*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v8i16,  { 3, 1, 1, 1 } },
2736*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i16,  { 3, 1, 1, 1 } },
2737*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16,  { 3, 1, 1, 1 } },
2738*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i32,  { 3, 1, 1, 1 } },
2739*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i32,  { 3, 1, 1, 1 } },
27400b57cec5SDimitry Andric 
2741*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i64,  { 4, 1, 1, 1 } },
2742*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i32,  { 5, 1, 1, 1 } },
2743*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v16i16, { 4, 1, 1, 1 } },
2744*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i64,  { 9, 1, 1, 1 } },
2745*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i1,  MVT::v16i64, {11, 1, 1, 1 } },
27465ffd83dbSDimitry Andric 
2747*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
2748*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i32, { 6, 1, 1, 1 } },
2749*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i16, { 2, 1, 1, 1 } }, // and+extract+packuswb
2750*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v8i32,  { 5, 1, 1, 1 } },
2751*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v8i32,  { 5, 1, 1, 1 } },
2752*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v4i64,  { 5, 1, 1, 1 } },
2753*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v4i64,  { 3, 1, 1, 1 } }, // and+extract+2*packusdw
2754*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i32,  MVT::v4i64,  { 2, 1, 1, 1 } },
27550b57cec5SDimitry Andric 
2756*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v4i1,   { 3, 1, 1, 1 } },
2757*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f64,  MVT::v4i1,   { 3, 1, 1, 1 } },
2758*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v8i1,   { 8, 1, 1, 1 } },
2759*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v16i8,  { 4, 1, 1, 1 } },
2760*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f64,  MVT::v16i8,  { 2, 1, 1, 1 } },
2761*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v8i16,  { 4, 1, 1, 1 } },
2762*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f64,  MVT::v8i16,  { 2, 1, 1, 1 } },
2763*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f64,  MVT::v4i32,  { 2, 1, 1, 1 } },
2764*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v8i32,  { 2, 1, 1, 1 } },
2765*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  { 4, 1, 1, 1 } },
2766*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v2i64,  { 5, 1, 1, 1 } },
2767*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v4i64,  { 8, 1, 1, 1 } },
27680b57cec5SDimitry Andric 
2769*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i1,   { 7, 1, 1, 1 } },
2770*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i1,   { 7, 1, 1, 1 } },
2771*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i1,   { 6, 1, 1, 1 } },
2772*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v16i8,  { 4, 1, 1, 1 } },
2773*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v16i8,  { 2, 1, 1, 1 } },
2774*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i16,  { 4, 1, 1, 1 } },
2775*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v8i16,  { 2, 1, 1, 1 } },
2776*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i32,  { 4, 1, 1, 1 } },
2777*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i32,  { 4, 1, 1, 1 } },
2778*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i32,  { 5, 1, 1, 1 } },
2779*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i32,  { 6, 1, 1, 1 } },
2780*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i32,  { 8, 1, 1, 1 } },
2781*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  {10, 1, 1, 1 } },
2782*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  {10, 1, 1, 1 } },
2783*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i64,  {18, 1, 1, 1 } },
2784*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  { 5, 1, 1, 1 } },
2785*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i64,  {10, 1, 1, 1 } },
27860b57cec5SDimitry Andric 
2787*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v8f32,  { 2, 1, 1, 1 } },
2788*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v4f64,  { 2, 1, 1, 1 } },
2789*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v32i8,  MVT::v8f32,  { 2, 1, 1, 1 } },
2790*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v32i8,  MVT::v4f64,  { 2, 1, 1, 1 } },
2791*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i16,  MVT::v8f32,  { 2, 1, 1, 1 } },
2792*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i16,  MVT::v4f64,  { 2, 1, 1, 1 } },
2793*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v8f32,  { 2, 1, 1, 1 } },
2794*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v4f64,  { 2, 1, 1, 1 } },
2795*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v4i32,  MVT::v4f64,  { 2, 1, 1, 1 } },
2796*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i32,  MVT::v8f32,  { 2, 1, 1, 1 } },
2797*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i32,  MVT::v8f64,  { 5, 1, 1, 1 } },
27985ffd83dbSDimitry Andric 
2799*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v8f32,  { 2, 1, 1, 1 } },
2800*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v4f64,  { 2, 1, 1, 1 } },
2801*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v32i8,  MVT::v8f32,  { 2, 1, 1, 1 } },
2802*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v32i8,  MVT::v4f64,  { 2, 1, 1, 1 } },
2803*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v8f32,  { 2, 1, 1, 1 } },
2804*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v4f64,  { 2, 1, 1, 1 } },
2805*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v8f32,  { 2, 1, 1, 1 } },
2806*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v4f64,  { 2, 1, 1, 1 } },
2807*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f32,  { 3, 1, 1, 1 } },
2808*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v2f64,  { 4, 1, 1, 1 } },
2809*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f64,  { 6, 1, 1, 1 } },
2810*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v8f32,  { 7, 1, 1, 1 } },
2811*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v4f64,  { 7, 1, 1, 1 } },
28120b57cec5SDimitry Andric 
2813*0fca6ea1SDimitry Andric     { ISD::FP_EXTEND,   MVT::v4f64,  MVT::v4f32,  { 1, 1, 1, 1 } },
2814*0fca6ea1SDimitry Andric     { ISD::FP_ROUND,    MVT::v4f32,  MVT::v4f64,  { 1, 1, 1, 1 } },
28150b57cec5SDimitry Andric   };
28160b57cec5SDimitry Andric 
2817*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry SSE41ConversionTbl[] = {
2818*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v16i8,   { 1, 1, 1, 1 } },
2819*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v16i8,   { 1, 1, 1, 1 } },
2820*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v16i8,   { 1, 1, 1, 1 } },
2821*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v16i8,   { 1, 1, 1, 1 } },
2822*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v16i8,   { 1, 1, 1, 1 } },
2823*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v16i8,   { 1, 1, 1, 1 } },
2824*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v8i16,   { 1, 1, 1, 1 } },
2825*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v8i16,   { 1, 1, 1, 1 } },
2826*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v8i16,   { 1, 1, 1, 1 } },
2827*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v8i16,   { 1, 1, 1, 1 } },
2828*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v4i32,   { 1, 1, 1, 1 } },
2829*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v4i32,   { 1, 1, 1, 1 } },
28300b57cec5SDimitry Andric 
28315ffd83dbSDimitry Andric     // These truncates end up widening elements.
2832*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i8,   { 1, 1, 1, 1 } }, // PMOVXZBQ
2833*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i16,  { 1, 1, 1, 1 } }, // PMOVXZWQ
2834*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i8,   { 1, 1, 1, 1 } }, // PMOVXZBD
28355ffd83dbSDimitry Andric 
2836*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v4i32,  { 2, 1, 1, 1 } },
2837*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v4i32,  { 2, 1, 1, 1 } },
2838*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v2i64,  { 2, 1, 1, 1 } },
28390b57cec5SDimitry Andric 
2840*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::f32,    MVT::i32,    { 1, 1, 1, 1 } },
2841*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::f64,    MVT::i32,    { 1, 1, 1, 1 } },
2842*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::f32,    MVT::i64,    { 1, 1, 1, 1 } },
2843*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::f64,    MVT::i64,    { 1, 1, 1, 1 } },
2844*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v16i8,  { 1, 1, 1, 1 } },
2845*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v16i8,  { 1, 1, 1, 1 } },
2846*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v8i16,  { 1, 1, 1, 1 } },
2847*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v8i16,  { 1, 1, 1, 1 } },
2848*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v4i32,  { 1, 1, 1, 1 } },
2849*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v4i32,  { 1, 1, 1, 1 } },
2850*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f64,  MVT::v4i32,  { 2, 1, 1, 1 } },
2851fe6060f1SDimitry Andric 
2852*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f32,    MVT::i32,    { 1, 1, 1, 1 } },
2853*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f64,    MVT::i32,    { 1, 1, 1, 1 } },
2854*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f32,    MVT::i64,    { 4, 1, 1, 1 } },
2855*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    { 4, 1, 1, 1 } },
2856*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v16i8,  { 1, 1, 1, 1 } },
2857*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v16i8,  { 1, 1, 1, 1 } },
2858*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v8i16,  { 1, 1, 1, 1 } },
2859*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v8i16,  { 1, 1, 1, 1 } },
2860*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i32,  { 3, 1, 1, 1 } },
2861*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i32,  { 3, 1, 1, 1 } },
2862*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v4i32,  { 2, 1, 1, 1 } },
2863*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v2i64,  {12, 1, 1, 1 } },
2864*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i64,  {22, 1, 1, 1 } },
2865*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  { 4, 1, 1, 1 } },
28665ffd83dbSDimitry Andric 
2867*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::i32,    MVT::f32,    { 1, 1, 1, 1 } },
2868*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::i64,    MVT::f32,    { 1, 1, 1, 1 } },
2869*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::i32,    MVT::f64,    { 1, 1, 1, 1 } },
2870*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::i64,    MVT::f64,    { 1, 1, 1, 1 } },
2871*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v4f32,  { 2, 1, 1, 1 } },
2872*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v2f64,  { 2, 1, 1, 1 } },
2873*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i16,  MVT::v4f32,  { 1, 1, 1, 1 } },
2874*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i16,  MVT::v2f64,  { 1, 1, 1, 1 } },
2875*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v4i32,  MVT::v4f32,  { 1, 1, 1, 1 } },
2876*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v4i32,  MVT::v2f64,  { 1, 1, 1, 1 } },
28775ffd83dbSDimitry Andric 
2878*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i32,    MVT::f32,    { 1, 1, 1, 1 } },
2879*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f32,    { 4, 1, 1, 1 } },
2880*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i32,    MVT::f64,    { 1, 1, 1, 1 } },
2881*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f64,    { 4, 1, 1, 1 } },
2882*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v4f32,  { 2, 1, 1, 1 } },
2883*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v2f64,  { 2, 1, 1, 1 } },
2884*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v4f32,  { 1, 1, 1, 1 } },
2885*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v2f64,  { 1, 1, 1, 1 } },
2886*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f32,  { 4, 1, 1, 1 } },
2887*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v2f64,  { 4, 1, 1, 1 } },
28880b57cec5SDimitry Andric   };
28890b57cec5SDimitry Andric 
2890*0fca6ea1SDimitry Andric   static const TypeConversionCostKindTblEntry SSE2ConversionTbl[] = {
2891fe6060f1SDimitry Andric     // These are somewhat magic numbers justified by comparing the
2892fe6060f1SDimitry Andric     // output of llvm-mca for our various supported scheduler models
2893fe6060f1SDimitry Andric     // and basing it off the worst case scenario.
2894*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::f32,    MVT::i32,    { 3, 1, 1, 1 } },
2895*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::f64,    MVT::i32,    { 3, 1, 1, 1 } },
2896*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::f32,    MVT::i64,    { 3, 1, 1, 1 } },
2897*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::f64,    MVT::i64,    { 3, 1, 1, 1 } },
2898*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v16i8,  { 3, 1, 1, 1 } },
2899*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v16i8,  { 4, 1, 1, 1 } },
2900*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v8i16,  { 3, 1, 1, 1 } },
2901*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v8i16,  { 4, 1, 1, 1 } },
2902*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v4i32,  { 3, 1, 1, 1 } },
2903*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v4i32,  { 4, 1, 1, 1 } },
2904*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v2i64,  { 8, 1, 1, 1 } },
2905*0fca6ea1SDimitry Andric     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  { 8, 1, 1, 1 } },
29060b57cec5SDimitry Andric 
2907*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f32,    MVT::i32,    { 3, 1, 1, 1 } },
2908*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f64,    MVT::i32,    { 3, 1, 1, 1 } },
2909*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f32,    MVT::i64,    { 8, 1, 1, 1 } },
2910*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    { 9, 1, 1, 1 } },
2911*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v16i8,  { 4, 1, 1, 1 } },
2912*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v16i8,  { 4, 1, 1, 1 } },
2913*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v8i16,  { 4, 1, 1, 1 } },
2914*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v8i16,  { 4, 1, 1, 1 } },
2915*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i32,  { 7, 1, 1, 1 } },
2916*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v4i32,  { 7, 1, 1, 1 } },
2917*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i32,  { 5, 1, 1, 1 } },
2918*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  {15, 1, 1, 1 } },
2919*0fca6ea1SDimitry Andric     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v2i64,  {18, 1, 1, 1 } },
29200b57cec5SDimitry Andric 
2921*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::i32,    MVT::f32,    { 4, 1, 1, 1 } },
2922*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::i64,    MVT::f32,    { 4, 1, 1, 1 } },
2923*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::i32,    MVT::f64,    { 4, 1, 1, 1 } },
2924*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::i64,    MVT::f64,    { 4, 1, 1, 1 } },
2925*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v4f32,  { 6, 1, 1, 1 } },
2926*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v2f64,  { 6, 1, 1, 1 } },
2927*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i16,  MVT::v4f32,  { 5, 1, 1, 1 } },
2928*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v8i16,  MVT::v2f64,  { 5, 1, 1, 1 } },
2929*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v4i32,  MVT::v4f32,  { 4, 1, 1, 1 } },
2930*0fca6ea1SDimitry Andric     { ISD::FP_TO_SINT,  MVT::v4i32,  MVT::v2f64,  { 4, 1, 1, 1 } },
2931480093f4SDimitry Andric 
2932*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i32,    MVT::f32,    { 4, 1, 1, 1 } },
2933*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f32,    { 4, 1, 1, 1 } },
2934*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i32,    MVT::f64,    { 4, 1, 1, 1 } },
2935*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f64,    {15, 1, 1, 1 } },
2936*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v4f32,  { 6, 1, 1, 1 } },
2937*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v2f64,  { 6, 1, 1, 1 } },
2938*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v4f32,  { 5, 1, 1, 1 } },
2939*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v2f64,  { 5, 1, 1, 1 } },
2940*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f32,  { 8, 1, 1, 1 } },
2941*0fca6ea1SDimitry Andric     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v2f64,  { 8, 1, 1, 1 } },
29420b57cec5SDimitry Andric 
2943*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64,  MVT::v16i8,  { 4, 1, 1, 1 } },
2944*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64,  MVT::v16i8,  { 4, 1, 1, 1 } },
2945*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v16i8,  { 2, 1, 1, 1 } },
2946*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v16i8,  { 3, 1, 1, 1 } },
2947*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v16i8,  { 1, 1, 1, 1 } },
2948*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v16i8,  { 2, 1, 1, 1 } },
2949*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64,  MVT::v8i16,  { 2, 1, 1, 1 } },
2950*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64,  MVT::v8i16,  { 3, 1, 1, 1 } },
2951*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v8i16,  { 1, 1, 1, 1 } },
2952*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v8i16,  { 2, 1, 1, 1 } },
2953*0fca6ea1SDimitry Andric     { ISD::ZERO_EXTEND, MVT::v2i64,  MVT::v4i32,  { 1, 1, 1, 1 } },
2954*0fca6ea1SDimitry Andric     { ISD::SIGN_EXTEND, MVT::v2i64,  MVT::v4i32,  { 2, 1, 1, 1 } },
29550b57cec5SDimitry Andric 
29565ffd83dbSDimitry Andric     // These truncates are really widening elements.
2957*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i32,  { 1, 1, 1, 1 } }, // PSHUFD
2958*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i16,  { 2, 1, 1, 1 } }, // PUNPCKLWD+DQ
2959*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i1,   MVT::v2i8,   { 3, 1, 1, 1 } }, // PUNPCKLBW+WD+PSHUFD
2960*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i16,  { 1, 1, 1, 1 } }, // PUNPCKLWD
2961*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i1,   MVT::v4i8,   { 2, 1, 1, 1 } }, // PUNPCKLBW+WD
2962*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i1,   MVT::v8i8,   { 1, 1, 1, 1 } }, // PUNPCKLBW
29635ffd83dbSDimitry Andric 
2964*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v8i16,  { 2, 1, 1, 1 } }, // PAND+PACKUSWB
2965*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i16, { 3, 1, 1, 1 } },
2966*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v4i32,  { 3, 1, 1, 1 } }, // PAND+2*PACKUSWB
2967*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i32, { 7, 1, 1, 1 } },
2968*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v2i16,  MVT::v2i32,  { 1, 1, 1, 1 } },
2969*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v4i32,  { 3, 1, 1, 1 } },
2970*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v8i32,  { 5, 1, 1, 1 } },
2971*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
2972*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v16i8,  MVT::v2i64,  { 4, 1, 1, 1 } }, // PAND+3*PACKUSWB
2973*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v2i64,  { 2, 1, 1, 1 } }, // PSHUFD+PSHUFLW
2974*0fca6ea1SDimitry Andric     { ISD::TRUNCATE,    MVT::v4i32,  MVT::v2i64,  { 1, 1, 1, 1 } }, // PSHUFD
29750b57cec5SDimitry Andric   };
29760b57cec5SDimitry Andric 
2977fe6060f1SDimitry Andric   // Attempt to map directly to (simple) MVT types to let us match custom entries.
29780b57cec5SDimitry Andric   EVT SrcTy = TLI->getValueType(DL, Src);
29790b57cec5SDimitry Andric   EVT DstTy = TLI->getValueType(DL, Dst);
29800b57cec5SDimitry Andric 
29810b57cec5SDimitry Andric   // The function getSimpleVT only handles simple value types.
2982fe6060f1SDimitry Andric   if (SrcTy.isSimple() && DstTy.isSimple()) {
29830b57cec5SDimitry Andric     MVT SimpleSrcTy = SrcTy.getSimpleVT();
29840b57cec5SDimitry Andric     MVT SimpleDstTy = DstTy.getSimpleVT();
29850b57cec5SDimitry Andric 
29865ffd83dbSDimitry Andric     if (ST->useAVX512Regs()) {
29870b57cec5SDimitry Andric       if (ST->hasBWI())
2988fe6060f1SDimitry Andric         if (const auto *Entry = ConvertCostTableLookup(
2989fe6060f1SDimitry Andric                 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2990*0fca6ea1SDimitry Andric           if (auto KindCost = Entry->Cost[CostKind])
2991*0fca6ea1SDimitry Andric             return *KindCost;
29920b57cec5SDimitry Andric 
29930b57cec5SDimitry Andric       if (ST->hasDQI())
2994fe6060f1SDimitry Andric         if (const auto *Entry = ConvertCostTableLookup(
2995fe6060f1SDimitry Andric                 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2996*0fca6ea1SDimitry Andric           if (auto KindCost = Entry->Cost[CostKind])
2997*0fca6ea1SDimitry Andric             return *KindCost;
29980b57cec5SDimitry Andric 
29990b57cec5SDimitry Andric       if (ST->hasAVX512())
3000fe6060f1SDimitry Andric         if (const auto *Entry = ConvertCostTableLookup(
3001fe6060f1SDimitry Andric                 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3002*0fca6ea1SDimitry Andric           if (auto KindCost = Entry->Cost[CostKind])
3003*0fca6ea1SDimitry Andric             return *KindCost;
30040b57cec5SDimitry Andric     }
30050b57cec5SDimitry Andric 
30065ffd83dbSDimitry Andric     if (ST->hasBWI())
3007fe6060f1SDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(
3008fe6060f1SDimitry Andric               AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3009*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3010*0fca6ea1SDimitry Andric           return *KindCost;
30115ffd83dbSDimitry Andric 
30125ffd83dbSDimitry Andric     if (ST->hasDQI())
3013fe6060f1SDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(
3014fe6060f1SDimitry Andric               AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3015*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3016*0fca6ea1SDimitry Andric           return *KindCost;
30175ffd83dbSDimitry Andric 
30185ffd83dbSDimitry Andric     if (ST->hasAVX512())
30195ffd83dbSDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD,
30205ffd83dbSDimitry Andric                                                      SimpleDstTy, SimpleSrcTy))
3021*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3022*0fca6ea1SDimitry Andric           return *KindCost;
30235ffd83dbSDimitry Andric 
30240b57cec5SDimitry Andric     if (ST->hasAVX2()) {
30250b57cec5SDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
30260b57cec5SDimitry Andric                                                      SimpleDstTy, SimpleSrcTy))
3027*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3028*0fca6ea1SDimitry Andric           return *KindCost;
30290b57cec5SDimitry Andric     }
30300b57cec5SDimitry Andric 
30310b57cec5SDimitry Andric     if (ST->hasAVX()) {
30320b57cec5SDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
30330b57cec5SDimitry Andric                                                      SimpleDstTy, SimpleSrcTy))
3034*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3035*0fca6ea1SDimitry Andric           return *KindCost;
30360b57cec5SDimitry Andric     }
30370b57cec5SDimitry Andric 
30380b57cec5SDimitry Andric     if (ST->hasSSE41()) {
30390b57cec5SDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
30400b57cec5SDimitry Andric                                                      SimpleDstTy, SimpleSrcTy))
3041*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3042*0fca6ea1SDimitry Andric           return *KindCost;
30430b57cec5SDimitry Andric     }
30440b57cec5SDimitry Andric 
30450b57cec5SDimitry Andric     if (ST->hasSSE2()) {
30460b57cec5SDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
30470b57cec5SDimitry Andric                                                      SimpleDstTy, SimpleSrcTy))
3048*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3049*0fca6ea1SDimitry Andric           return *KindCost;
30500b57cec5SDimitry Andric     }
3051fe6060f1SDimitry Andric   }
3052fe6060f1SDimitry Andric 
3053fe6060f1SDimitry Andric   // Fall back to legalized types.
3054bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LTSrc = getTypeLegalizationCost(Src);
3055bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LTDest = getTypeLegalizationCost(Dst);
3056fe6060f1SDimitry Andric 
305781ad6265SDimitry Andric   // If we're truncating to the same legalized type - just assume its free.
305881ad6265SDimitry Andric   if (ISD == ISD::TRUNCATE && LTSrc.second == LTDest.second)
305981ad6265SDimitry Andric     return TTI::TCC_Free;
306081ad6265SDimitry Andric 
3061fe6060f1SDimitry Andric   if (ST->useAVX512Regs()) {
3062fe6060f1SDimitry Andric     if (ST->hasBWI())
3063fe6060f1SDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(
3064fe6060f1SDimitry Andric               AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3065*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3066*0fca6ea1SDimitry Andric           return std::max(LTSrc.first, LTDest.first) * *KindCost;
3067fe6060f1SDimitry Andric 
3068fe6060f1SDimitry Andric     if (ST->hasDQI())
3069fe6060f1SDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(
3070fe6060f1SDimitry Andric               AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3071*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3072*0fca6ea1SDimitry Andric           return std::max(LTSrc.first, LTDest.first) * *KindCost;
3073fe6060f1SDimitry Andric 
3074fe6060f1SDimitry Andric     if (ST->hasAVX512())
3075fe6060f1SDimitry Andric       if (const auto *Entry = ConvertCostTableLookup(
3076fe6060f1SDimitry Andric               AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3077*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
3078*0fca6ea1SDimitry Andric           return std::max(LTSrc.first, LTDest.first) * *KindCost;
3079fe6060f1SDimitry Andric   }
3080fe6060f1SDimitry Andric 
3081fe6060f1SDimitry Andric   if (ST->hasBWI())
3082fe6060f1SDimitry Andric     if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD,
3083fe6060f1SDimitry Andric                                                    LTDest.second, LTSrc.second))
3084*0fca6ea1SDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3085*0fca6ea1SDimitry Andric         return std::max(LTSrc.first, LTDest.first) * *KindCost;
3086fe6060f1SDimitry Andric 
3087fe6060f1SDimitry Andric   if (ST->hasDQI())
3088fe6060f1SDimitry Andric     if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD,
3089fe6060f1SDimitry Andric                                                    LTDest.second, LTSrc.second))
3090*0fca6ea1SDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3091*0fca6ea1SDimitry Andric         return std::max(LTSrc.first, LTDest.first) * *KindCost;
3092fe6060f1SDimitry Andric 
3093fe6060f1SDimitry Andric   if (ST->hasAVX512())
3094fe6060f1SDimitry Andric     if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD,
3095fe6060f1SDimitry Andric                                                    LTDest.second, LTSrc.second))
3096*0fca6ea1SDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3097*0fca6ea1SDimitry Andric         return std::max(LTSrc.first, LTDest.first) * *KindCost;
3098fe6060f1SDimitry Andric 
3099fe6060f1SDimitry Andric   if (ST->hasAVX2())
3100fe6060f1SDimitry Andric     if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
3101fe6060f1SDimitry Andric                                                    LTDest.second, LTSrc.second))
3102*0fca6ea1SDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3103*0fca6ea1SDimitry Andric         return std::max(LTSrc.first, LTDest.first) * *KindCost;
3104fe6060f1SDimitry Andric 
3105fe6060f1SDimitry Andric   if (ST->hasAVX())
3106fe6060f1SDimitry Andric     if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
3107fe6060f1SDimitry Andric                                                    LTDest.second, LTSrc.second))
3108*0fca6ea1SDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3109*0fca6ea1SDimitry Andric         return std::max(LTSrc.first, LTDest.first) * *KindCost;
3110fe6060f1SDimitry Andric 
3111fe6060f1SDimitry Andric   if (ST->hasSSE41())
3112fe6060f1SDimitry Andric     if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
3113fe6060f1SDimitry Andric                                                    LTDest.second, LTSrc.second))
3114*0fca6ea1SDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3115*0fca6ea1SDimitry Andric         return std::max(LTSrc.first, LTDest.first) * *KindCost;
3116fe6060f1SDimitry Andric 
3117fe6060f1SDimitry Andric   if (ST->hasSSE2())
3118fe6060f1SDimitry Andric     if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
3119fe6060f1SDimitry Andric                                                    LTDest.second, LTSrc.second))
3120*0fca6ea1SDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3121*0fca6ea1SDimitry Andric         return std::max(LTSrc.first, LTDest.first) * *KindCost;
3122fe6060f1SDimitry Andric 
3123fe6060f1SDimitry Andric   // Fallback, for i8/i16 sitofp/uitofp cases we need to extend to i32 for
3124fe6060f1SDimitry Andric   // sitofp.
3125fe6060f1SDimitry Andric   if ((ISD == ISD::SINT_TO_FP || ISD == ISD::UINT_TO_FP) &&
3126fe6060f1SDimitry Andric       1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3127fe6060f1SDimitry Andric     Type *ExtSrc = Src->getWithNewBitWidth(32);
3128fe6060f1SDimitry Andric     unsigned ExtOpc =
3129fe6060f1SDimitry Andric         (ISD == ISD::SINT_TO_FP) ? Instruction::SExt : Instruction::ZExt;
3130fe6060f1SDimitry Andric 
3131fe6060f1SDimitry Andric     // For scalar loads the extend would be free.
3132fe6060f1SDimitry Andric     InstructionCost ExtCost = 0;
3133fe6060f1SDimitry Andric     if (!(Src->isIntegerTy() && I && isa<LoadInst>(I->getOperand(0))))
3134fe6060f1SDimitry Andric       ExtCost = getCastInstrCost(ExtOpc, ExtSrc, Src, CCH, CostKind);
3135fe6060f1SDimitry Andric 
3136fe6060f1SDimitry Andric     return ExtCost + getCastInstrCost(Instruction::SIToFP, Dst, ExtSrc,
3137fe6060f1SDimitry Andric                                       TTI::CastContextHint::None, CostKind);
3138fe6060f1SDimitry Andric   }
3139fe6060f1SDimitry Andric 
3140fe6060f1SDimitry Andric   // Fallback for fptosi/fptoui i8/i16 cases we need to truncate from fptosi
3141fe6060f1SDimitry Andric   // i32.
3142fe6060f1SDimitry Andric   if ((ISD == ISD::FP_TO_SINT || ISD == ISD::FP_TO_UINT) &&
3143fe6060f1SDimitry Andric       1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3144fe6060f1SDimitry Andric     Type *TruncDst = Dst->getWithNewBitWidth(32);
3145fe6060f1SDimitry Andric     return getCastInstrCost(Instruction::FPToSI, TruncDst, Src, CCH, CostKind) +
3146fe6060f1SDimitry Andric            getCastInstrCost(Instruction::Trunc, Dst, TruncDst,
3147fe6060f1SDimitry Andric                             TTI::CastContextHint::None, CostKind);
3148fe6060f1SDimitry Andric   }
31490b57cec5SDimitry Andric 
3150*0fca6ea1SDimitry Andric   // TODO: Allow non-throughput costs that aren't binary.
3151*0fca6ea1SDimitry Andric   auto AdjustCost = [&CostKind](InstructionCost Cost,
3152*0fca6ea1SDimitry Andric                                 InstructionCost N = 1) -> InstructionCost {
3153*0fca6ea1SDimitry Andric     if (CostKind != TTI::TCK_RecipThroughput)
3154*0fca6ea1SDimitry Andric       return Cost == 0 ? 0 : N;
3155*0fca6ea1SDimitry Andric     return Cost * N;
3156*0fca6ea1SDimitry Andric   };
3157e8d8bef9SDimitry Andric   return AdjustCost(
3158e8d8bef9SDimitry Andric       BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
31590b57cec5SDimitry Andric }
31600b57cec5SDimitry Andric 
3161fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
3162fe6060f1SDimitry Andric                                                Type *CondTy,
3163e8d8bef9SDimitry Andric                                                CmpInst::Predicate VecPred,
31645ffd83dbSDimitry Andric                                                TTI::TargetCostKind CostKind,
31650b57cec5SDimitry Andric                                                const Instruction *I) {
3166bdd1243dSDimitry Andric   // Early out if this type isn't scalar/vector integer/float.
3167bdd1243dSDimitry Andric   if (!(ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy()))
3168e8d8bef9SDimitry Andric     return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
3169e8d8bef9SDimitry Andric                                      I);
31705ffd83dbSDimitry Andric 
31710b57cec5SDimitry Andric   // Legalize the type.
3172bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
31730b57cec5SDimitry Andric 
31740b57cec5SDimitry Andric   MVT MTy = LT.second;
31750b57cec5SDimitry Andric 
31760b57cec5SDimitry Andric   int ISD = TLI->InstructionOpcodeToISD(Opcode);
31770b57cec5SDimitry Andric   assert(ISD && "Invalid opcode");
31780b57cec5SDimitry Andric 
317981ad6265SDimitry Andric   InstructionCost ExtraCost = 0;
3180349cc55cSDimitry Andric   if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
31810b57cec5SDimitry Andric     // Some vector comparison predicates cost extra instructions.
3182*0fca6ea1SDimitry Andric     // TODO: Adjust ExtraCost based on CostKind?
3183349cc55cSDimitry Andric     // TODO: Should we invert this and assume worst case cmp costs
3184349cc55cSDimitry Andric     // and reduce for particular predicates?
31850b57cec5SDimitry Andric     if (MTy.isVector() &&
31860b57cec5SDimitry Andric         !((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) ||
31870b57cec5SDimitry Andric           (ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) ||
31880b57cec5SDimitry Andric           ST->hasBWI())) {
3189349cc55cSDimitry Andric       // Fallback to I if a specific predicate wasn't specified.
3190349cc55cSDimitry Andric       CmpInst::Predicate Pred = VecPred;
3191349cc55cSDimitry Andric       if (I && (Pred == CmpInst::BAD_ICMP_PREDICATE ||
3192349cc55cSDimitry Andric                 Pred == CmpInst::BAD_FCMP_PREDICATE))
3193349cc55cSDimitry Andric         Pred = cast<CmpInst>(I)->getPredicate();
3194349cc55cSDimitry Andric 
3195*0fca6ea1SDimitry Andric       bool CmpWithConstant = false;
3196*0fca6ea1SDimitry Andric       if (auto *CmpInstr = dyn_cast_or_null<CmpInst>(I))
3197*0fca6ea1SDimitry Andric         CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3198*0fca6ea1SDimitry Andric 
3199349cc55cSDimitry Andric       switch (Pred) {
32000b57cec5SDimitry Andric       case CmpInst::Predicate::ICMP_NE:
32010b57cec5SDimitry Andric         // xor(cmpeq(x,y),-1)
3202*0fca6ea1SDimitry Andric         ExtraCost = CmpWithConstant ? 0 : 1;
32030b57cec5SDimitry Andric         break;
32040b57cec5SDimitry Andric       case CmpInst::Predicate::ICMP_SGE:
32050b57cec5SDimitry Andric       case CmpInst::Predicate::ICMP_SLE:
32060b57cec5SDimitry Andric         // xor(cmpgt(x,y),-1)
3207*0fca6ea1SDimitry Andric         ExtraCost = CmpWithConstant ? 0 : 1;
32080b57cec5SDimitry Andric         break;
32090b57cec5SDimitry Andric       case CmpInst::Predicate::ICMP_ULT:
32100b57cec5SDimitry Andric       case CmpInst::Predicate::ICMP_UGT:
32110b57cec5SDimitry Andric         // cmpgt(xor(x,signbit),xor(y,signbit))
32120b57cec5SDimitry Andric         // xor(cmpeq(pmaxu(x,y),x),-1)
3213*0fca6ea1SDimitry Andric         ExtraCost = CmpWithConstant ? 1 : 2;
32140b57cec5SDimitry Andric         break;
32150b57cec5SDimitry Andric       case CmpInst::Predicate::ICMP_ULE:
32160b57cec5SDimitry Andric       case CmpInst::Predicate::ICMP_UGE:
32170b57cec5SDimitry Andric         if ((ST->hasSSE41() && MTy.getScalarSizeInBits() == 32) ||
32180b57cec5SDimitry Andric             (ST->hasSSE2() && MTy.getScalarSizeInBits() < 32)) {
32190b57cec5SDimitry Andric           // cmpeq(psubus(x,y),0)
32200b57cec5SDimitry Andric           // cmpeq(pminu(x,y),x)
32210b57cec5SDimitry Andric           ExtraCost = 1;
32220b57cec5SDimitry Andric         } else {
32230b57cec5SDimitry Andric           // xor(cmpgt(xor(x,signbit),xor(y,signbit)),-1)
3224*0fca6ea1SDimitry Andric           ExtraCost = CmpWithConstant ? 2 : 3;
32250b57cec5SDimitry Andric         }
32260b57cec5SDimitry Andric         break;
3227bdd1243dSDimitry Andric       case CmpInst::Predicate::FCMP_ONE:
3228bdd1243dSDimitry Andric       case CmpInst::Predicate::FCMP_UEQ:
3229bdd1243dSDimitry Andric         // Without AVX we need to expand FCMP_ONE/FCMP_UEQ cases.
3230bdd1243dSDimitry Andric         // Use FCMP_UEQ expansion - FCMP_ONE should be the same.
3231bdd1243dSDimitry Andric         if (CondTy && !ST->hasAVX())
3232bdd1243dSDimitry Andric           return getCmpSelInstrCost(Opcode, ValTy, CondTy,
3233bdd1243dSDimitry Andric                                     CmpInst::Predicate::FCMP_UNO, CostKind) +
3234bdd1243dSDimitry Andric                  getCmpSelInstrCost(Opcode, ValTy, CondTy,
3235bdd1243dSDimitry Andric                                     CmpInst::Predicate::FCMP_OEQ, CostKind) +
3236bdd1243dSDimitry Andric                  getArithmeticInstrCost(Instruction::Or, CondTy, CostKind);
3237bdd1243dSDimitry Andric 
3238bdd1243dSDimitry Andric         break;
3239349cc55cSDimitry Andric       case CmpInst::Predicate::BAD_ICMP_PREDICATE:
3240349cc55cSDimitry Andric       case CmpInst::Predicate::BAD_FCMP_PREDICATE:
3241349cc55cSDimitry Andric         // Assume worst case scenario and add the maximum extra cost.
3242349cc55cSDimitry Andric         ExtraCost = 3;
3243349cc55cSDimitry Andric         break;
32440b57cec5SDimitry Andric       default:
32450b57cec5SDimitry Andric         break;
32460b57cec5SDimitry Andric       }
32470b57cec5SDimitry Andric     }
32480b57cec5SDimitry Andric   }
32490b57cec5SDimitry Andric 
3250bdd1243dSDimitry Andric   static const CostKindTblEntry SLMCostTbl[] = {
32518bcb0991SDimitry Andric     // slm pcmpeq/pcmpgt throughput is 2
3252bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v2i64,   { 2, 5, 1, 2 } },
3253bdd1243dSDimitry Andric     // slm pblendvb/blendvpd/blendvps throughput is 4
3254bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v2f64,   { 4, 4, 1, 3 } }, // vblendvpd
3255bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4f32,   { 4, 4, 1, 3 } }, // vblendvps
3256bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v2i64,   { 4, 4, 1, 3 } }, // pblendvb
3257bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8i32,   { 4, 4, 1, 3 } }, // pblendvb
3258bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8i16,   { 4, 4, 1, 3 } }, // pblendvb
3259bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v16i8,   { 4, 4, 1, 3 } }, // pblendvb
32608bcb0991SDimitry Andric   };
32618bcb0991SDimitry Andric 
3262bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512BWCostTbl[] = {
3263bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v32i16,  { 1, 1, 1, 1 } },
3264bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v16i16,  { 1, 1, 1, 1 } },
3265bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v64i8,   { 1, 1, 1, 1 } },
3266bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v32i8,   { 1, 1, 1, 1 } },
32670b57cec5SDimitry Andric 
3268bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v32i16,  { 1, 1, 1, 1 } },
3269bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v64i8,   { 1, 1, 1, 1 } },
32700b57cec5SDimitry Andric   };
32710b57cec5SDimitry Andric 
3272bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512CostTbl[] = {
3273bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v8f64,   { 1, 4, 1, 1 } },
3274bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4f64,   { 1, 4, 1, 1 } },
3275bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v16f32,  { 1, 4, 1, 1 } },
3276bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v8f32,   { 1, 4, 1, 1 } },
32770b57cec5SDimitry Andric 
3278bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v8i64,   { 1, 1, 1, 1 } },
3279bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4i64,   { 1, 1, 1, 1 } },
3280bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v2i64,   { 1, 1, 1, 1 } },
3281bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v16i32,  { 1, 1, 1, 1 } },
3282bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v8i32,   { 1, 1, 1, 1 } },
3283bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v32i16,  { 3, 7, 5, 5 } },
3284bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v64i8,   { 3, 7, 5, 5 } },
32855ffd83dbSDimitry Andric 
3286bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8i64,   { 1, 1, 1, 1 } },
3287bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4i64,   { 1, 1, 1, 1 } },
3288bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v2i64,   { 1, 1, 1, 1 } },
3289bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v16i32,  { 1, 1, 1, 1 } },
3290bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8i32,   { 1, 1, 1, 1 } },
3291bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4i32,   { 1, 1, 1, 1 } },
3292bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8f64,   { 1, 1, 1, 1 } },
3293bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4f64,   { 1, 1, 1, 1 } },
3294bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v2f64,   { 1, 1, 1, 1 } },
3295bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::f64,     { 1, 1, 1, 1 } },
3296bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v16f32,  { 1, 1, 1, 1 } },
3297bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8f32 ,  { 1, 1, 1, 1 } },
3298bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4f32,   { 1, 1, 1, 1 } },
3299bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::f32  ,   { 1, 1, 1, 1 } },
33005ffd83dbSDimitry Andric 
3301bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v32i16,  { 2, 2, 4, 4 } },
3302bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v16i16,  { 1, 1, 1, 1 } },
3303bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8i16,   { 1, 1, 1, 1 } },
3304bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v64i8,   { 2, 2, 4, 4 } },
3305bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v32i8,   { 1, 1, 1, 1 } },
3306bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v16i8,   { 1, 1, 1, 1 } },
33070b57cec5SDimitry Andric   };
33080b57cec5SDimitry Andric 
3309bdd1243dSDimitry Andric   static const CostKindTblEntry AVX2CostTbl[] = {
3310bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4f64,   { 1, 4, 1, 2 } },
3311bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v2f64,   { 1, 4, 1, 1 } },
3312bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::f64,     { 1, 4, 1, 1 } },
3313bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v8f32,   { 1, 4, 1, 2 } },
3314bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4f32,   { 1, 4, 1, 1 } },
3315bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::f32,     { 1, 4, 1, 1 } },
33160b57cec5SDimitry Andric 
3317bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4i64,   { 1, 1, 1, 2 } },
3318bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v8i32,   { 1, 1, 1, 2 } },
3319bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v16i16,  { 1, 1, 1, 2 } },
3320bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v32i8,   { 1, 1, 1, 2 } },
3321bdd1243dSDimitry Andric 
3322bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4f64,   { 2, 2, 1, 2 } }, // vblendvpd
3323bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8f32,   { 2, 2, 1, 2 } }, // vblendvps
3324bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4i64,   { 2, 2, 1, 2 } }, // pblendvb
3325bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8i32,   { 2, 2, 1, 2 } }, // pblendvb
3326bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v16i16,  { 2, 2, 1, 2 } }, // pblendvb
3327bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v32i8,   { 2, 2, 1, 2 } }, // pblendvb
33280b57cec5SDimitry Andric   };
33290b57cec5SDimitry Andric 
3330bdd1243dSDimitry Andric   static const CostKindTblEntry XOPCostTbl[] = {
3331bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4i64,   { 4, 2, 5, 6 } },
3332bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v2i64,   { 1, 1, 1, 1 } },
3333bdd1243dSDimitry Andric   };
3334bdd1243dSDimitry Andric 
3335bdd1243dSDimitry Andric   static const CostKindTblEntry AVX1CostTbl[] = {
3336bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4f64,   { 2, 3, 1, 2 } },
3337bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v2f64,   { 1, 3, 1, 1 } },
3338bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::f64,     { 1, 3, 1, 1 } },
3339bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v8f32,   { 2, 3, 1, 2 } },
3340bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4f32,   { 1, 3, 1, 1 } },
3341bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::f32,     { 1, 3, 1, 1 } },
3342bdd1243dSDimitry Andric 
33430b57cec5SDimitry Andric     // AVX1 does not support 8-wide integer compare.
3344bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4i64,   { 4, 2, 5, 6 } },
3345bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v8i32,   { 4, 2, 5, 6 } },
3346bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v16i16,  { 4, 2, 5, 6 } },
3347bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v32i8,   { 4, 2, 5, 6 } },
33480b57cec5SDimitry Andric 
3349bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4f64,   { 3, 3, 1, 2 } }, // vblendvpd
3350bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8f32,   { 3, 3, 1, 2 } }, // vblendvps
3351bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4i64,   { 3, 3, 1, 2 } }, // vblendvpd
3352bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8i32,   { 3, 3, 1, 2 } }, // vblendvps
3353bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v16i16,  { 3, 3, 3, 3 } }, // vandps + vandnps + vorps
3354bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v32i8,   { 3, 3, 3, 3 } }, // vandps + vandnps + vorps
33550b57cec5SDimitry Andric   };
33560b57cec5SDimitry Andric 
3357bdd1243dSDimitry Andric   static const CostKindTblEntry SSE42CostTbl[] = {
3358bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v2i64,   { 1, 2, 1, 2 } },
33590b57cec5SDimitry Andric   };
33600b57cec5SDimitry Andric 
3361bdd1243dSDimitry Andric   static const CostKindTblEntry SSE41CostTbl[] = {
3362bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v2f64,   { 1, 5, 1, 1 } },
3363bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4f32,   { 1, 5, 1, 1 } },
336481ad6265SDimitry Andric 
3365bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v2f64,   { 2, 2, 1, 2 } }, // blendvpd
3366bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::f64,     { 2, 2, 1, 2 } }, // blendvpd
3367bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4f32,   { 2, 2, 1, 2 } }, // blendvps
3368bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::f32  ,   { 2, 2, 1, 2 } }, // blendvps
3369bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v2i64,   { 2, 2, 1, 2 } }, // pblendvb
3370bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4i32,   { 2, 2, 1, 2 } }, // pblendvb
3371bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8i16,   { 2, 2, 1, 2 } }, // pblendvb
3372bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v16i8,   { 2, 2, 1, 2 } }, // pblendvb
33730b57cec5SDimitry Andric   };
33740b57cec5SDimitry Andric 
3375bdd1243dSDimitry Andric   static const CostKindTblEntry SSE2CostTbl[] = {
3376bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v2f64,   { 2, 5, 1, 1 } },
3377bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::f64,     { 1, 5, 1, 1 } },
33780b57cec5SDimitry Andric 
3379bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v2i64,   { 5, 4, 5, 5 } }, // pcmpeqd/pcmpgtd expansion
3380bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4i32,   { 1, 1, 1, 1 } },
3381bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v8i16,   { 1, 1, 1, 1 } },
3382bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v16i8,   { 1, 1, 1, 1 } },
3383bdd1243dSDimitry Andric 
3384bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v2f64,   { 2, 2, 3, 3 } }, // andpd + andnpd + orpd
3385bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::f64,     { 2, 2, 3, 3 } }, // andpd + andnpd + orpd
3386bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v2i64,   { 2, 2, 3, 3 } }, // pand + pandn + por
3387bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4i32,   { 2, 2, 3, 3 } }, // pand + pandn + por
3388bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v8i16,   { 2, 2, 3, 3 } }, // pand + pandn + por
3389bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v16i8,   { 2, 2, 3, 3 } }, // pand + pandn + por
33900b57cec5SDimitry Andric   };
33910b57cec5SDimitry Andric 
3392bdd1243dSDimitry Andric   static const CostKindTblEntry SSE1CostTbl[] = {
3393bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::v4f32,   { 2, 5, 1, 1 } },
3394bdd1243dSDimitry Andric     { ISD::SETCC,   MVT::f32,     { 1, 5, 1, 1 } },
33950b57cec5SDimitry Andric 
3396bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::v4f32,   { 2, 2, 3, 3 } }, // andps + andnps + orps
3397bdd1243dSDimitry Andric     { ISD::SELECT,  MVT::f32,     { 2, 2, 3, 3 } }, // andps + andnps + orps
33980b57cec5SDimitry Andric   };
33990b57cec5SDimitry Andric 
3400349cc55cSDimitry Andric   if (ST->useSLMArithCosts())
34018bcb0991SDimitry Andric     if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
3402bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3403bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
34048bcb0991SDimitry Andric 
34050b57cec5SDimitry Andric   if (ST->hasBWI())
34060b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
3407bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3408bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
34090b57cec5SDimitry Andric 
34100b57cec5SDimitry Andric   if (ST->hasAVX512())
34110b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
3412bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3413bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
34140b57cec5SDimitry Andric 
34150b57cec5SDimitry Andric   if (ST->hasAVX2())
34160b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
3417bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3418bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
3419bdd1243dSDimitry Andric 
3420bdd1243dSDimitry Andric   if (ST->hasXOP())
3421bdd1243dSDimitry Andric     if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
3422bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3423bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
34240b57cec5SDimitry Andric 
34250b57cec5SDimitry Andric   if (ST->hasAVX())
34260b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
3427bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3428bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
34290b57cec5SDimitry Andric 
34300b57cec5SDimitry Andric   if (ST->hasSSE42())
34310b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
3432bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3433bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
34340b57cec5SDimitry Andric 
34350b57cec5SDimitry Andric   if (ST->hasSSE41())
34360b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
3437bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3438bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
34390b57cec5SDimitry Andric 
34400b57cec5SDimitry Andric   if (ST->hasSSE2())
34410b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
3442bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3443bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
34440b57cec5SDimitry Andric 
34450b57cec5SDimitry Andric   if (ST->hasSSE1())
34460b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
3447bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
3448bdd1243dSDimitry Andric         return LT.first * (ExtraCost + *KindCost);
3449bdd1243dSDimitry Andric 
3450bdd1243dSDimitry Andric   // Assume a 3cy latency for fp select ops.
3451bdd1243dSDimitry Andric   if (CostKind == TTI::TCK_Latency && Opcode == Instruction::Select)
3452bdd1243dSDimitry Andric     if (ValTy->getScalarType()->isFloatingPointTy())
3453bdd1243dSDimitry Andric       return 3;
34540b57cec5SDimitry Andric 
3455e8d8bef9SDimitry Andric   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
34560b57cec5SDimitry Andric }
34570b57cec5SDimitry Andric 
34580b57cec5SDimitry Andric unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
34590b57cec5SDimitry Andric 
3460fe6060f1SDimitry Andric InstructionCost
3461bdd1243dSDimitry Andric X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3462fe6060f1SDimitry Andric                                   TTI::TargetCostKind CostKind) {
34630b57cec5SDimitry Andric   // Costs should match the codegen from:
34640b57cec5SDimitry Andric   // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
34650b57cec5SDimitry Andric   // BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll
34660b57cec5SDimitry Andric   // CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll
34670b57cec5SDimitry Andric   // CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll
34680b57cec5SDimitry Andric   // CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll
3469e8d8bef9SDimitry Andric 
3470e8d8bef9SDimitry Andric   // TODO: Overflow intrinsics (*ADDO, *SUBO, *MULO) with vector types are not
3471e8d8bef9SDimitry Andric   //       specialized in these tables yet.
3472bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512VBMI2CostTbl[] = {
3473bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::v8i64,   {  1,  1,  1,  1 } },
3474bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::v4i64,   {  1,  1,  1,  1 } },
3475bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::v2i64,   {  1,  1,  1,  1 } },
3476bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::v16i32,  {  1,  1,  1,  1 } },
3477bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::v8i32,   {  1,  1,  1,  1 } },
3478bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::v4i32,   {  1,  1,  1,  1 } },
3479bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::v32i16,  {  1,  1,  1,  1 } },
3480bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::v16i16,  {  1,  1,  1,  1 } },
3481bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::v8i16,   {  1,  1,  1,  1 } },
3482bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v32i16,  {  1,  1,  1,  1 } },
3483bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v16i16,  {  1,  1,  1,  1 } },
3484bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v8i16,   {  1,  1,  1,  1 } },
3485bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v32i16,  {  1,  1,  1,  1 } },
3486bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v16i16,  {  1,  1,  1,  1 } },
3487bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v8i16,   {  1,  1,  1,  1 } },
3488*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v32i16,  {  1,  1,  1,  1 } },
3489*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v16i16,  {  1,  1,  1,  1 } },
3490*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v8i16,   {  1,  1,  1,  1 } },
3491349cc55cSDimitry Andric   };
3492bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512BITALGCostTbl[] = {
3493bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v32i16,  {  1,  1,  1,  1 } },
3494bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v64i8,   {  1,  1,  1,  1 } },
3495bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i16,  {  1,  1,  1,  1 } },
3496bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v32i8,   {  1,  1,  1,  1 } },
3497bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i16,   {  1,  1,  1,  1 } },
3498bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i8,   {  1,  1,  1,  1 } },
3499349cc55cSDimitry Andric   };
3500bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512VPOPCNTDQCostTbl[] = {
3501bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i64,   {  1,  1,  1,  1 } },
3502bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i32,  {  1,  1,  1,  1 } },
3503bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i64,   {  1,  1,  1,  1 } },
3504bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i32,   {  1,  1,  1,  1 } },
3505bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v2i64,   {  1,  1,  1,  1 } },
3506bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i32,   {  1,  1,  1,  1 } },
35070b57cec5SDimitry Andric   };
3508bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512CDCostTbl[] = {
3509bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i64,   {  1,  5,  1,  1 } },
3510bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i32,  {  1,  5,  1,  1 } },
3511bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v32i16,  { 18, 27, 23, 27 } },
3512bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v64i8,   {  3, 16,  9, 11 } },
3513bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v4i64,   {  1,  5,  1,  1 } },
3514bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i32,   {  1,  5,  1,  1 } },
3515bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i16,  {  8, 19, 11, 13 } },
3516bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v32i8,   {  2, 11,  9, 10 } },
3517bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v2i64,   {  1,  5,  1,  1 } },
3518bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v4i32,   {  1,  5,  1,  1 } },
3519bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i16,   {  3, 15,  4,  6 } },
3520bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i8,   {  2, 10,  9, 10 } },
3521bdd1243dSDimitry Andric 
3522bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i64,   {  2,  8,  6,  7 } },
3523bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i32,  {  2,  8,  6,  7 } },
3524bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v4i64,   {  1,  8,  6,  6 } },
3525bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i32,   {  1,  8,  6,  6 } },
3526bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v2i64,   {  1,  8,  6,  6 } },
3527bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v4i32,   {  1,  8,  6,  6 } },
35280b57cec5SDimitry Andric   };
3529bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512BWCostTbl[] = {
3530bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v32i16,  {  1,  1,  1,  1 } },
3531bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v64i8,   {  1,  1,  1,  1 } },
353206c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v2i64,   {  3, 10, 10, 11 } },
353306c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i64,   {  3, 11, 10, 11 } },
353406c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i64,   {  3, 12, 10, 14 } },
353506c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i32,   {  3, 10, 10, 11 } },
353606c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i32,   {  3, 11, 10, 11 } },
353706c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i32,  {  3, 12, 10, 14 } },
353806c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i16,   {  3, 10, 10, 11 } },
353906c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i16,  {  3, 11, 10, 11 } },
354006c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v32i16,  {  3, 12, 10, 14 } },
354106c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i8,   {  2,  5,  9,  9 } },
354206c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v32i8,   {  2,  5,  9,  9 } },
354306c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v64i8,   {  2,  5,  9, 12 } },
354406c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v2i64,   {  1,  1,  1,  2 } },
354506c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v4i64,   {  1,  1,  1,  2 } },
354606c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v8i64,   {  1,  1,  1,  2 } },
354706c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v4i32,   {  1,  1,  1,  2 } },
354806c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v8i32,   {  1,  1,  1,  2 } },
354906c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v16i32,  {  1,  1,  1,  2 } },
355006c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v8i16,   {  1,  1,  1,  2 } },
355106c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v16i16,  {  1,  1,  1,  2 } },
355206c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v32i16,  {  1,  1,  1,  2 } },
3553bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i64,   {  8, 22, 23, 23 } },
3554bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i32,  {  8, 23, 25, 25 } },
3555bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v32i16,  {  4, 15, 15, 16 } },
3556bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v64i8,   {  3, 12, 10,  9 } },
3557bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v2i64,   {  3,  7, 10, 10 } },
3558bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i64,   {  3,  7, 10, 10 } },
3559bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i64,   {  3,  8, 10, 12 } },
3560bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i32,   {  7, 11, 14, 14 } },
3561bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i32,   {  7, 11, 14, 14 } },
3562bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i32,  {  7, 12, 14, 16 } },
3563bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i16,   {  2,  7, 11, 11 } },
3564bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i16,  {  2,  7, 11, 11 } },
3565bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v32i16,  {  3,  7, 11, 13 } },
3566bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i8,   {  2,  4,  8,  8 } },
3567bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v32i8,   {  2,  4,  8,  8 } },
3568bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v64i8,   {  2,  5,  8, 10 } },
3569bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i16,   {  3,  9, 14, 14 } },
3570bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i16,  {  3,  9, 14, 14 } },
3571bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v32i16,  {  3, 10, 14, 16 } },
3572bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i8,   {  2,  6, 11, 11 } },
3573bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v32i8,   {  2,  6, 11, 11 } },
3574bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v64i8,   {  3,  7, 11, 13 } },
3575bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v32i16,  {  2,  8,  6,  8 } },
3576bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v16i16,  {  2,  8,  6,  7 } },
3577bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v8i16,   {  2,  7,  6,  7 } },
3578bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v64i8,   {  5,  6, 11, 12 } },
3579bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v32i8,   {  5, 15,  7, 10 } },
3580bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v16i8,   {  5, 15,  7, 10 } },
3581bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v32i16,  {  2,  8,  6,  8 } },
3582bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v16i16,  {  2,  8,  6,  7 } },
3583bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v8i16,   {  2,  7,  6,  7 } },
3584bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v64i8,   {  5,  6, 12, 14 } },
3585bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v32i8,   {  5, 14,  6,  9 } },
3586bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v16i8,   {  5, 14,  6,  9 } },
3587*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v32i16,  {  2,  5,  3,  3 } },
3588*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v16i16,  {  1,  5,  3,  3 } },
3589*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v8i16,   {  1,  5,  3,  3 } },
3590*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v64i8,   {  2,  9,  3,  4 } },
3591*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v32i8,   {  1,  9,  3,  4 } },
3592*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v16i8,   {  1,  8,  3,  4 } },
3593bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v32i16,  {  1 } },
3594bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v64i8,   {  1 } },
3595bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v32i16,  {  1,  1,  1,  1 } },
3596bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v64i8,   {  1,  1,  1,  1 } },
3597bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v32i16,  {  1,  1,  1,  1 } },
3598bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v64i8,   {  1,  1,  1,  1 } },
3599bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v32i16,  {  1 } },
3600bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v64i8,   {  1 } },
3601bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v32i16,  {  1 } },
3602bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v64i8,   {  1 } },
3603bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v32i16,  {  1,  1,  1,  1 } },
3604bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v64i8,   {  1,  1,  1,  1 } },
3605bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v32i16,  {  1,  1,  1,  1 } },
3606bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v64i8,   {  1,  1,  1,  1 } },
3607bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v32i16,  {  1 } },
3608bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v64i8,   {  1 } },
36090b57cec5SDimitry Andric   };
3610bdd1243dSDimitry Andric   static const CostKindTblEntry AVX512CostTbl[] = {
3611bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v8i64,   {  1,  1,  1,  1 } },
3612bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v4i64,   {  1,  1,  1,  1 } },
3613bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v2i64,   {  1,  1,  1,  1 } },
3614bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v16i32,  {  1,  1,  1,  1 } },
3615bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v8i32,   {  1,  1,  1,  1 } },
3616bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v32i16,  {  2,  7,  4,  4 } },
3617bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v16i16,  {  1,  1,  1,  1 } },
3618bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v64i8,   {  2,  7,  4,  4 } },
3619bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v32i8,   {  1,  1,  1,  1 } },
362006c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i64,   {  9, 13, 20, 20 } },
362106c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i32,  {  9, 13, 20, 20 } },
362206c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v32i16,  {  9, 13, 20, 20 } },
362306c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v64i8,   {  6, 11, 17, 17 } },
362406c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v8i64,   {  4,  7,  5,  5 } },
362506c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v16i32,  {  4,  7,  5,  5 } },
362606c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v32i16,  {  4,  7,  5,  5 } },
3627bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i64,   { 10, 28, 32, 32 } },
3628bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i32,  { 12, 30, 38, 38 } },
3629bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v32i16,  {  8, 15, 29, 29 } },
3630bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v64i8,   {  6, 11, 19, 19 } },
3631bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i64,   { 16, 16, 19, 19 } },
3632bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i32,  { 24, 19, 27, 27 } },
3633bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v32i16,  { 18, 15, 22, 22 } },
3634bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v64i8,   { 12, 11, 16, 16 } },
3635bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i64,   {  2,  8,  6,  7 } },
3636bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i32,  {  2,  8,  6,  7 } },
3637bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v32i16,  {  7, 17, 27, 27 } },
3638bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v64i8,   {  6, 13, 21, 21 } },
3639bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v8i64,   {  1,  1,  1,  1 } },
3640bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v4i64,   {  1,  1,  1,  1 } },
3641bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v2i64,   {  1,  1,  1,  1 } },
3642bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v16i32,  {  1,  1,  1,  1 } },
3643bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v8i32,   {  1,  1,  1,  1 } },
3644bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v4i32,   {  1,  1,  1,  1 } },
3645bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v8i64,   {  1,  1,  1,  1 } },
3646bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v4i64,   {  1,  1,  1,  1 } },
3647bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v2i64,   {  1,  1,  1,  1 } },
3648bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v16i32,  {  1,  1,  1,  1 } },
3649bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v8i32,   {  1,  1,  1,  1 } },
3650bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v4i32,   {  1,  1,  1,  1 } },
3651*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v8i64,   {  1,  1,  1,  1 } },
3652*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v4i64,   {  1,  1,  1,  1 } },
3653*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v2i64,   {  1,  1,  1,  1 } },
3654*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v16i32,  {  1,  1,  1,  1 } },
3655*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v8i32,   {  1,  1,  1,  1 } },
3656*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v4i32,   {  1,  1,  1,  1 } },
3657bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v8i64,   {  1,  3,  1,  1 } },
3658bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v16i32,  {  1,  1,  1,  1 } },
3659bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v32i16,  {  3,  7,  5,  5 } },
3660bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v64i8,   {  3,  7,  5,  5 } },
3661bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v4i64,   {  1,  3,  1,  1 } },
3662bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v2i64,   {  1,  3,  1,  1 } },
3663bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v8i64,   {  1,  3,  1,  1 } },
3664bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v16i32,  {  1,  1,  1,  1 } },
3665bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v32i16,  {  3,  7,  5,  5 } },
3666bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v64i8,   {  3,  7,  5,  5 } },
3667bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v4i64,   {  1,  3,  1,  1 } },
3668bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v2i64,   {  1,  3,  1,  1 } },
3669bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v8i64,   {  1,  3,  1,  1 } },
3670bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v16i32,  {  1,  1,  1,  1 } },
3671bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v32i16,  {  3,  7,  5,  5 } },
3672bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v64i8,   {  3,  7,  5,  5 } },
3673bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v4i64,   {  1,  3,  1,  1 } },
3674bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v2i64,   {  1,  3,  1,  1 } },
3675bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v8i64,   {  1,  3,  1,  1 } },
3676bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v16i32,  {  1,  1,  1,  1 } },
3677bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v32i16,  {  3,  7,  5,  5 } },
3678bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v64i8,   {  3,  7,  5,  5 } },
3679bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v4i64,   {  1,  3,  1,  1 } },
3680bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v2i64,   {  1,  3,  1,  1 } },
3681bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v16i32,  {  2 } }, // pmaxud + psubd
3682bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v2i64,   {  2 } }, // pmaxuq + psubq
3683bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v4i64,   {  2 } }, // pmaxuq + psubq
3684bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v8i64,   {  2 } }, // pmaxuq + psubq
3685bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v16i32,  {  3 } }, // not + pminud + paddd
3686bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v2i64,   {  3 } }, // not + pminuq + paddq
3687bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v4i64,   {  3 } }, // not + pminuq + paddq
3688bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v8i64,   {  3 } }, // not + pminuq + paddq
3689bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v32i16,  {  2 } },
3690bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v64i8,   {  2 } },
3691bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v32i16,  {  2 } },
3692bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v64i8,   {  2 } },
3693bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v32i16,  {  2 } },
3694bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v64i8,   {  2 } },
3695bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v32i16,  {  2 } },
3696bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v64i8,   {  2 } },
369706c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f32,     {  2,  2,  3,  3 } },
369806c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v4f32,   {  1,  1,  3,  3 } },
369906c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v8f32,   {  2,  2,  3,  3 } },
370006c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v16f32,  {  4,  4,  3,  3 } },
370106c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f64,     {  2,  2,  3,  3 } },
370206c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v2f64,   {  1,  1,  3,  3 } },
370306c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v4f64,   {  2,  2,  3,  3 } },
370406c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v8f64,   {  3,  3,  3,  3 } },
3705bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f32,     {  3, 12,  1,  1 } }, // Skylake from http://www.agner.org/
3706bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f32,   {  3, 12,  1,  1 } }, // Skylake from http://www.agner.org/
3707bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v8f32,   {  6, 12,  1,  1 } }, // Skylake from http://www.agner.org/
3708bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v16f32,  { 12, 20,  1,  3 } }, // Skylake from http://www.agner.org/
3709bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f64,     {  6, 18,  1,  1 } }, // Skylake from http://www.agner.org/
3710bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v2f64,   {  6, 18,  1,  1 } }, // Skylake from http://www.agner.org/
3711bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f64,   { 12, 18,  1,  1 } }, // Skylake from http://www.agner.org/
3712bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v8f64,   { 24, 32,  1,  3 } }, // Skylake from http://www.agner.org/
37130b57cec5SDimitry Andric   };
3714bdd1243dSDimitry Andric   static const CostKindTblEntry XOPCostTbl[] = {
371506c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i64,   {  3,  6,  5,  6 } },
371606c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i32,   {  3,  6,  5,  6 } },
371706c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i16,  {  3,  6,  5,  6 } },
371806c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v32i8,   {  3,  6,  5,  6 } },
371906c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v2i64,   {  2,  7,  1,  1 } },
372006c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i32,   {  2,  7,  1,  1 } },
372106c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i16,   {  2,  7,  1,  1 } },
372206c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i8,   {  2,  7,  1,  1 } },
372306c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::i64,     {  2,  2,  3,  4 } },
372406c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::i32,     {  2,  2,  3,  4 } },
372506c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::i16,     {  2,  2,  3,  4 } },
372606c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::i8,      {  2,  2,  3,  4 } },
3727bdd1243dSDimitry Andric     // XOP: ROTL = VPROT(X,Y), ROTR = VPROT(X,SUB(0,Y))
3728bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v4i64,   {  4,  7,  5,  6 } },
3729bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v8i32,   {  4,  7,  5,  6 } },
3730bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v16i16,  {  4,  7,  5,  6 } },
3731bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v32i8,   {  4,  7,  5,  6 } },
3732bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v2i64,   {  1,  3,  1,  1 } },
3733bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v4i32,   {  1,  3,  1,  1 } },
3734bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v8i16,   {  1,  3,  1,  1 } },
3735bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::v16i8,   {  1,  3,  1,  1 } },
3736bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v4i64,   {  4,  7,  8,  9 } },
3737bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v8i32,   {  4,  7,  8,  9 } },
3738bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v16i16,  {  4,  7,  8,  9 } },
3739bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v32i8,   {  4,  7,  8,  9 } },
3740bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v2i64,   {  1,  3,  3,  3 } },
3741bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v4i32,   {  1,  3,  3,  3 } },
3742bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::v8i16,   {  1,  3,  3,  3 } },
3743*0fca6ea1SDimitry Andric     { ISD::ROTR,       MVT::v16i8,   {  1,  3,  3,  3 } },
3744*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v4i64,   {  4,  7,  5,  6 } },
3745*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v8i32,   {  4,  7,  5,  6 } },
3746*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v16i16,  {  4,  7,  5,  6 } },
3747*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v32i8,   {  4,  7,  5,  6 } },
3748*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v2i64,   {  1,  3,  1,  1 } },
3749*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v4i32,   {  1,  3,  1,  1 } },
3750*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v8i16,   {  1,  3,  1,  1 } },
3751*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v16i8,   {  1,  3,  1,  1 } },
37520b57cec5SDimitry Andric   };
3753bdd1243dSDimitry Andric   static const CostKindTblEntry AVX2CostTbl[] = {
3754bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v2i64,   {  2,  4,  3,  5 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
3755bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v4i64,   {  2,  4,  3,  5 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
3756bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v4i32,   {  1,  1,  1,  1 } },
3757bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v8i32,   {  1,  1,  1,  2 } },
3758bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v8i16,   {  1,  1,  1,  1 } },
3759bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v16i16,  {  1,  1,  1,  2 } },
3760bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v16i8,   {  1,  1,  1,  1 } },
3761bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v32i8,   {  1,  1,  1,  2 } },
376206c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v2i64,   {  3, 11, 10, 11 } },
376306c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i64,   {  5, 11, 10, 17 } },
376406c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i32,   {  3, 11, 10, 11 } },
376506c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i32,   {  5, 11, 10, 17 } },
376606c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i16,   {  3, 11, 10, 11 } },
376706c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i16,  {  5, 11, 10, 17 } },
376806c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i8,   {  3,  6,  9,  9 } },
376906c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v32i8,   {  4,  5,  9, 15 } },
377006c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v2i64,   {  1,  2,  1,  2 } },
377106c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v4i64,   {  1,  3,  1,  2 } },
377206c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v4i32,   {  1,  2,  1,  2 } },
377306c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v8i32,   {  1,  3,  1,  2 } },
377406c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v8i16,   {  1,  2,  1,  2 } },
377506c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v16i16,  {  1,  3,  1,  2 } },
3776bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v2i64,   {  7, 18, 24, 25 } },
3777bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v4i64,   { 14, 18, 24, 44 } },
3778bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v4i32,   {  5, 16, 19, 20 } },
3779bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i32,   { 10, 16, 19, 34 } },
3780bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i16,   {  4, 13, 14, 15 } },
3781bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i16,  {  6, 14, 14, 24 } },
3782bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i8,   {  3, 12,  9, 10 } },
3783bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v32i8,   {  4, 12,  9, 14 } },
3784bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v2i64,   {  3,  9, 10, 10 } },
3785bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i64,   {  4,  9, 10, 14 } },
3786bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i32,   {  7, 12, 14, 14 } },
3787bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i32,   {  7, 12, 14, 18 } },
3788bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i16,   {  3,  7, 11, 11 } },
3789bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i16,  {  6,  8, 11, 18 } },
3790bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i8,   {  2,  5,  8,  8 } },
3791bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v32i8,   {  3,  5,  8, 12 } },
3792bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v2i64,   {  4, 11, 13, 13 } },
3793bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v4i64,   {  5, 11, 13, 20 } },
3794bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v4i32,   {  7, 14, 17, 17 } },
3795bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i32,   {  7, 15, 17, 24 } },
3796bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i16,   {  4,  9, 14, 14 } },
3797bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i16,  {  6,  9, 14, 24 } },
3798bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i8,   {  3,  7, 11, 11 } },
3799bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v32i8,   {  5,  7, 11, 18 } },
3800bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v16i16,  {  1 } },
3801bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v32i8,   {  1 } },
3802bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v2i64,   {  2,  7,  2,  3 } },
3803bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v4i64,   {  2,  7,  2,  3 } },
3804bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v8i32,   {  1,  1,  1,  2 } },
3805bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v16i16,  {  1,  1,  1,  2 } },
3806bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v32i8,   {  1,  1,  1,  2 } },
3807bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v2i64,   {  2,  7,  2,  3 } },
3808bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v4i64,   {  2,  7,  2,  3 } },
3809bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v8i32,   {  1,  1,  1,  2 } },
3810bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v16i16,  {  1,  1,  1,  2 } },
3811bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v32i8,   {  1,  1,  1,  2 } },
3812bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v16i16,  {  1 } },
3813bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v32i8,   {  1 } },
3814bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v16i16,  {  1 } },
3815bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v32i8,   {  1 } },
3816bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v8i32,   {  3 } }, // not + pminud + paddd
3817bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v2i64,   {  2,  8,  5,  6 } },
3818bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v4i64,   {  2,  8,  5,  8 } },
3819bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v8i32,   {  1,  1,  1,  2 } },
3820bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v16i16,  {  1,  1,  1,  2 } },
3821bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v32i8,   {  1,  1,  1,  2 } },
3822bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v2i64,   {  2,  8,  5,  6 } },
3823bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v4i64,   {  2,  8,  5,  8 } },
3824bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v8i32,   {  1,  1,  1,  2 } },
3825bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v16i16,  {  1,  1,  1,  2 } },
3826bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v32i8,   {  1,  1,  1,  2 } },
3827bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v16i16,  {  1 } },
3828bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v32i8,   {  1 } },
3829bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v8i32,   {  2 } }, // pmaxud + psubd
383006c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f32,     {  2,  7,  3,  5 } }, // MAXSS + CMPUNORDSS + BLENDVPS
383106c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v4f32,   {  2,  7,  3,  5 } }, // MAXPS + CMPUNORDPS + BLENDVPS
383206c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v8f32,   {  3,  7,  3,  6 } }, // MAXPS + CMPUNORDPS + BLENDVPS
383306c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f64,     {  2,  7,  3,  5 } }, // MAXSD + CMPUNORDSD + BLENDVPD
383406c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v2f64,   {  2,  7,  3,  5 } }, // MAXPD + CMPUNORDPD + BLENDVPD
383506c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v4f64,   {  3,  7,  3,  6 } }, // MAXPD + CMPUNORDPD + BLENDVPD
3836bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f32,     {  7, 15,  1,  1 } }, // vsqrtss
3837bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f32,   {  7, 15,  1,  1 } }, // vsqrtps
3838bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v8f32,   { 14, 21,  1,  3 } }, // vsqrtps
3839bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f64,     { 14, 21,  1,  1 } }, // vsqrtsd
3840bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v2f64,   { 14, 21,  1,  1 } }, // vsqrtpd
3841bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f64,   { 28, 35,  1,  3 } }, // vsqrtpd
38420b57cec5SDimitry Andric   };
3843bdd1243dSDimitry Andric   static const CostKindTblEntry AVX1CostTbl[] = {
3844bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v4i64,   {  6,  8,  6, 12 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
3845bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v8i32,   {  3,  6,  4,  5 } },
3846bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v16i16,  {  3,  6,  4,  5 } },
3847bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v32i8,   {  3,  6,  4,  5 } },
384806c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i64,   { 17, 20, 20, 33 } }, // 2 x 128-bit Op + extract/insert
384906c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v2i64,   {  8, 13, 10, 16 } },
385006c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i32,   { 17, 20, 20, 33 } }, // 2 x 128-bit Op + extract/insert
385106c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i32,   {  8, 13, 10, 16 } },
385206c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i16,  { 17, 20, 20, 33 } }, // 2 x 128-bit Op + extract/insert
385306c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i16,   {  8, 13, 10, 16 } },
385406c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v32i8,   { 13, 15, 17, 26 } }, // 2 x 128-bit Op + extract/insert
385506c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i8,   {  7,  7,  9, 13 } },
3856647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v4i64,   {  5,  6,  5, 10 } },
3857647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v2i64,   {  2,  2,  1,  3 } },
3858647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v8i32,   {  5,  6,  5, 10 } },
3859647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v4i32,   {  2,  2,  1,  3 } },
386006c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v16i16,  {  5,  6,  5, 10 } },
386106c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v8i16,   {  2,  2,  1,  3 } },
3862bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v4i64,   { 29, 33, 49, 58 } }, // 2 x 128-bit Op + extract/insert
3863bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v2i64,   { 14, 24, 24, 28 } },
3864bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i32,   { 24, 28, 39, 48 } }, // 2 x 128-bit Op + extract/insert
3865bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v4i32,   { 12, 20, 19, 23 } },
3866bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i16,  { 19, 22, 29, 38 } }, // 2 x 128-bit Op + extract/insert
3867bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i16,   {  9, 16, 14, 18 } },
3868bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v32i8,   { 14, 15, 19, 28 } }, // 2 x 128-bit Op + extract/insert
3869bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i8,   {  7, 12,  9, 13 } },
3870bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i64,   { 14, 18, 19, 28 } }, // 2 x 128-bit Op + extract/insert
3871bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v2i64,   {  7, 14, 10, 14 } },
3872bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i32,   { 18, 24, 27, 36 } }, // 2 x 128-bit Op + extract/insert
3873bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i32,   {  9, 20, 14, 18 } },
3874bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i16,  { 16, 21, 22, 31 } }, // 2 x 128-bit Op + extract/insert
3875bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i16,   {  8, 18, 11, 15 } },
3876bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v32i8,   { 13, 15, 16, 25 } }, // 2 x 128-bit Op + extract/insert
3877bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i8,   {  6, 12,  8, 12 } },
3878bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v4i64,   { 17, 22, 24, 33 } }, // 2 x 128-bit Op + extract/insert
3879bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v2i64,   {  9, 19, 13, 17 } },
3880bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i32,   { 21, 27, 32, 41 } }, // 2 x 128-bit Op + extract/insert
3881bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v4i32,   { 11, 24, 17, 21 } },
3882bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i16,  { 18, 24, 27, 36 } }, // 2 x 128-bit Op + extract/insert
3883bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i16,   {  9, 21, 14, 18 } },
3884bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v32i8,   { 15, 18, 21, 30 } }, // 2 x 128-bit Op + extract/insert
3885bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i8,   {  8, 16, 11, 15 } },
3886bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
3887bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
3888bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v4i64,   {  6,  9,  6, 12 } }, // 2 x 128-bit Op + extract/insert
3889bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v2i64,   {  3,  7,  2,  4 } },
3890bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v8i32,   {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3891bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v16i16,  {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3892bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v32i8,   {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3893bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v4i64,   {  6,  9,  6, 12 } }, // 2 x 128-bit Op + extract/insert
3894bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v2i64,   {  3,  7,  2,  3 } },
3895bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v8i32,   {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3896bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v16i16,  {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3897bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v32i8,   {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3898bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
3899bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
3900bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
3901bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
3902bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v8i32,   {  8 } }, // 2 x 128-bit Op + extract/insert
3903bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v4i64,   {  9, 10, 11, 17 } }, // 2 x 128-bit Op + extract/insert
3904bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v2i64,   {  4,  8,  5,  7 } },
3905bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v8i32,   {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3906bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v16i16,  {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3907bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v32i8,   {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3908bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v4i64,   {  9, 10, 11, 17 } }, // 2 x 128-bit Op + extract/insert
3909bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v2i64,   {  4,  8,  5,  7 } },
3910bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v8i32,   {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3911bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v16i16,  {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3912bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v32i8,   {  4,  6,  5,  6 } }, // 2 x 128-bit Op + extract/insert
3913bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
3914bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
3915bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v8i32,   {  6 } }, // 2 x 128-bit Op + extract/insert
391606c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f32,     {  3,  6,  3,  5 } }, // MAXSS + CMPUNORDSS + BLENDVPS
391706c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v4f32,   {  3,  6,  3,  5 } }, // MAXPS + CMPUNORDPS + BLENDVPS
391806c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v8f32,   {  5,  7,  3, 10 } }, // MAXPS + CMPUNORDPS + BLENDVPS
391906c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f64,     {  3,  6,  3,  5 } }, // MAXSD + CMPUNORDSD + BLENDVPD
392006c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v2f64,   {  3,  6,  3,  5 } }, // MAXPD + CMPUNORDPD + BLENDVPD
392106c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v4f64,   {  5,  7,  3, 10 } }, // MAXPD + CMPUNORDPD + BLENDVPD
3922bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f32,     { 21, 21,  1,  1 } }, // vsqrtss
3923bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f32,   { 21, 21,  1,  1 } }, // vsqrtps
3924bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v8f32,   { 42, 42,  1,  3 } }, // vsqrtps
3925bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f64,     { 27, 27,  1,  1 } }, // vsqrtsd
3926bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v2f64,   { 27, 27,  1,  1 } }, // vsqrtpd
3927bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f64,   { 54, 54,  1,  3 } }, // vsqrtpd
39280b57cec5SDimitry Andric   };
3929*0fca6ea1SDimitry Andric   static const CostKindTblEntry GFNICostTbl[] = {
3930*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::i8,      {  3,  3,  3,  4 } }, // gf2p8affineqb
3931*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::i16,     {  3,  3,  4,  6 } }, // gf2p8affineqb
3932*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::i32,     {  3,  3,  4,  5 } }, // gf2p8affineqb
3933*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::i64,     {  3,  3,  4,  6 } }, // gf2p8affineqb
3934*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v16i8,   {  1,  6,  1,  2 } }, // gf2p8affineqb
3935*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v32i8,   {  1,  6,  1,  2 } }, // gf2p8affineqb
3936*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v64i8,   {  1,  6,  1,  2 } }, // gf2p8affineqb
3937*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v8i16,   {  1,  8,  2,  4 } }, // gf2p8affineqb
3938*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v16i16,  {  1,  9,  2,  4 } }, // gf2p8affineqb
3939*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v32i16,  {  1,  9,  2,  4 } }, // gf2p8affineqb
3940*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v4i32,   {  1,  8,  2,  4 } }, // gf2p8affineqb
3941*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v8i32,   {  1,  9,  2,  4 } }, // gf2p8affineqb
3942*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v16i32,  {  1,  9,  2,  4 } }, // gf2p8affineqb
3943*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v2i64,   {  1,  8,  2,  4 } }, // gf2p8affineqb
3944*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v4i64,   {  1,  9,  2,  4 } }, // gf2p8affineqb
3945*0fca6ea1SDimitry Andric     { ISD::BITREVERSE, MVT::v8i64,   {  1,  9,  2,  4 } }, // gf2p8affineqb
3946*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v16i8,   {  1,  6,  1,  2 } }, // gf2p8affineqb
3947*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v32i8,   {  1,  6,  1,  2 } }, // gf2p8affineqb
3948*0fca6ea1SDimitry Andric     { X86ISD::VROTLI,  MVT::v64i8,   {  1,  6,  1,  2 } }, // gf2p8affineqb
3949*0fca6ea1SDimitry Andric   };
3950bdd1243dSDimitry Andric   static const CostKindTblEntry GLMCostTbl[] = {
3951bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f32,     { 19, 20, 1, 1 } }, // sqrtss
3952bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f32,   { 37, 41, 1, 5 } }, // sqrtps
3953bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f64,     { 34, 35, 1, 1 } }, // sqrtsd
3954bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v2f64,   { 67, 71, 1, 5 } }, // sqrtpd
39550b57cec5SDimitry Andric   };
3956bdd1243dSDimitry Andric   static const CostKindTblEntry SLMCostTbl[] = {
3957647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v2i64,   {  5,  5, 1, 5 } },
3958647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v4i32,   {  5,  5, 1, 5 } },
3959647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v8i16,   {  5,  5, 1, 5 } },
3960bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f32,     { 20, 20, 1, 1 } }, // sqrtss
3961bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f32,   { 40, 41, 1, 5 } }, // sqrtps
3962bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f64,     { 35, 35, 1, 1 } }, // sqrtsd
3963bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v2f64,   { 70, 71, 1, 5 } }, // sqrtpd
39640b57cec5SDimitry Andric   };
3965bdd1243dSDimitry Andric   static const CostKindTblEntry SSE42CostTbl[] = {
3966bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v4i32,   {  2 } }, // pmaxud + psubd
3967bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v4i32,   {  3 } }, // not + pminud + paddd
396806c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f32,     {  5,  5,  7,  7 } }, // MAXSS + CMPUNORDSS + BLENDVPS
396906c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v4f32,   {  4,  4,  4,  5 } }, // MAXPS + CMPUNORDPS + BLENDVPS
397006c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f64,     {  5,  5,  7,  7 } }, // MAXSD + CMPUNORDSD + BLENDVPD
397106c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v2f64,   {  4,  4,  4,  5 } }, // MAXPD + CMPUNORDPD + BLENDVPD
3972bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f32,     { 18, 18,  1,  1 } }, // Nehalem from http://www.agner.org/
3973bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f32,   { 18, 18,  1,  1 } }, // Nehalem from http://www.agner.org/
3974e8d8bef9SDimitry Andric   };
3975bdd1243dSDimitry Andric   static const CostKindTblEntry SSE41CostTbl[] = {
3976bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v2i64,   {  3,  4,  3,  5 } }, // BLENDVPD(X,PSUBQ(0,X),X)
3977bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v2i64,   {  3,  7,  2,  3 } },
3978bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v4i32,   {  1,  1,  1,  1 } },
3979bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v16i8,   {  1,  1,  1,  1 } },
3980bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v2i64,   {  3,  7,  2,  3 } },
3981bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v4i32,   {  1,  1,  1,  1 } },
3982bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v16i8,   {  1,  1,  1,  1 } },
3983bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v2i64,   {  2, 11,  6,  7 } },
3984bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v4i32,   {  1,  1,  1,  1 } },
3985bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v8i16,   {  1,  1,  1,  1 } },
3986bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v2i64,   {  2, 11,  6,  7 } },
3987bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v4i32,   {  1,  1,  1,  1 } },
3988bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v8i16,   {  1,  1,  1,  1 } },
39890b57cec5SDimitry Andric   };
3990bdd1243dSDimitry Andric   static const CostKindTblEntry SSSE3CostTbl[] = {
3991bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v4i32,   {  1,  2,  1,  1 } },
3992bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v8i16,   {  1,  2,  1,  1 } },
3993bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v16i8,   {  1,  2,  1,  1 } },
399406c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v2i64,   { 16, 20, 11, 21 } },
399506c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i32,   { 16, 20, 11, 21 } },
399606c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i16,   { 16, 20, 11, 21 } },
399706c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i8,   { 11, 12, 10, 16 } },
3998647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v2i64,   {  2,  3,  1,  5 } },
3999647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v4i32,   {  2,  3,  1,  5 } },
4000647cbc5dSDimitry Andric     { ISD::BSWAP,      MVT::v8i16,   {  2,  3,  1,  5 } },
4001bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v2i64,   { 18, 28, 28, 35 } },
4002bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v4i32,   { 15, 20, 22, 28 } },
4003bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i16,   { 13, 17, 16, 22 } },
4004bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i8,   { 11, 15, 10, 16 } },
4005bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v2i64,   { 13, 19, 12, 18 } },
4006bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i32,   { 18, 24, 16, 22 } },
4007bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i16,   { 13, 18, 14, 20 } },
4008bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i8,   { 11, 12, 10, 16 } },
4009bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v2i64,   { 13, 25, 15, 22 } },
4010bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v4i32,   { 18, 26, 19, 25 } },
4011bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i16,   { 13, 20, 17, 23 } },
4012bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i8,   { 11, 16, 13, 19 } }
40130b57cec5SDimitry Andric   };
4014bdd1243dSDimitry Andric   static const CostKindTblEntry SSE2CostTbl[] = {
4015bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v2i64,   {  3,  6,  5,  5 } },
4016bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v4i32,   {  1,  4,  4,  4 } },
4017bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v8i16,   {  1,  2,  3,  3 } },
4018bdd1243dSDimitry Andric     { ISD::ABS,        MVT::v16i8,   {  1,  2,  3,  3 } },
401906c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v2i64,   { 16, 20, 32, 32 } },
402006c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v4i32,   { 16, 20, 30, 30 } },
402106c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v8i16,   { 16, 20, 25, 25 } },
402206c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::v16i8,   { 11, 12, 21, 21 } },
402306c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v2i64,   {  5,  6, 11, 11 } },
402406c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v4i32,   {  5,  5,  9,  9 } },
402506c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::v8i16,   {  5,  5,  4,  5 } },
4026bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v2i64,   { 10, 45, 36, 38 } },
4027bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v4i32,   { 10, 45, 38, 40 } },
4028bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v8i16,   {  9, 38, 32, 34 } },
4029bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::v16i8,   {  8, 39, 29, 32 } },
4030bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v2i64,   { 12, 26, 16, 18 } },
4031bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v4i32,   { 15, 29, 21, 23 } },
4032bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v8i16,   { 13, 25, 18, 20 } },
4033bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::v16i8,   { 10, 21, 14, 16 } },
4034bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v2i64,   { 14, 28, 19, 21 } },
4035bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v4i32,   { 18, 31, 24, 26 } },
4036bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v8i16,   { 16, 27, 21, 23 } },
4037bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::v16i8,   { 13, 23, 17, 19 } },
4038bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v8i16,   {  1 } },
4039bdd1243dSDimitry Andric     { ISD::SADDSAT,    MVT::v16i8,   {  1 } },
4040bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v2i64,   {  4,  8, 15, 15 } },
4041bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v4i32,   {  2,  4,  5,  5 } },
4042bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v8i16,   {  1,  1,  1,  1 } },
4043bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::v16i8,   {  2,  4,  5,  5 } },
4044bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v2i64,   {  4,  8, 15, 15 } },
4045bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v4i32,   {  2,  4,  5,  5 } },
4046bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v8i16,   {  1,  1,  1,  1 } },
4047bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::v16i8,   {  2,  4,  5,  5 } },
4048bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v8i16,   {  1 } },
4049bdd1243dSDimitry Andric     { ISD::SSUBSAT,    MVT::v16i8,   {  1 } },
4050bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v8i16,   {  1 } },
4051bdd1243dSDimitry Andric     { ISD::UADDSAT,    MVT::v16i8,   {  1 } },
4052bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v2i64,   {  4,  8, 15, 15 } },
4053bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v4i32,   {  2,  5,  8,  8 } },
4054bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v8i16,   {  1,  3,  3,  3 } },
4055bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::v16i8,   {  1,  1,  1,  1 } },
4056bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v2i64,   {  4,  8, 15, 15 } },
4057bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v4i32,   {  2,  5,  8,  8 } },
4058bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v8i16,   {  1,  3,  3,  3 } },
4059bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::v16i8,   {  1,  1,  1,  1 } },
4060bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v8i16,   {  1 } },
4061bdd1243dSDimitry Andric     { ISD::USUBSAT,    MVT::v16i8,   {  1 } },
406206c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f64,     {  5,  5,  7,  7 } },
406306c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v2f64,   {  4,  6,  6,  6 } },
4064bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f64,     { 32, 32,  1,  1 } }, // Nehalem from http://www.agner.org/
4065bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v2f64,   { 32, 32,  1,  1 } }, // Nehalem from http://www.agner.org/
40660b57cec5SDimitry Andric   };
4067bdd1243dSDimitry Andric   static const CostKindTblEntry SSE1CostTbl[] = {
406806c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::f32,     {  5,  5,  7,  7 } },
406906c3fb27SDimitry Andric     { ISD::FMAXNUM,    MVT::v4f32,   {  4,  6,  6,  6 } },
4070bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::f32,     { 28, 30,  1,  2 } }, // Pentium III from http://www.agner.org/
4071bdd1243dSDimitry Andric     { ISD::FSQRT,      MVT::v4f32,   { 56, 56,  1,  2 } }, // Pentium III from http://www.agner.org/
40725ffd83dbSDimitry Andric   };
4073bdd1243dSDimitry Andric   static const CostKindTblEntry BMI64CostTbl[] = { // 64-bit targets
4074bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::i64,     {  1 } },
40755ffd83dbSDimitry Andric   };
4076bdd1243dSDimitry Andric   static const CostKindTblEntry BMI32CostTbl[] = { // 32 or 64-bit targets
4077bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::i32,     {  1 } },
4078bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::i16,     {  1 } },
4079bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::i8,      {  1 } },
40808bcb0991SDimitry Andric   };
4081bdd1243dSDimitry Andric   static const CostKindTblEntry LZCNT64CostTbl[] = { // 64-bit targets
4082bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::i64,     {  1 } },
40838bcb0991SDimitry Andric   };
4084bdd1243dSDimitry Andric   static const CostKindTblEntry LZCNT32CostTbl[] = { // 32 or 64-bit targets
4085bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::i32,     {  1 } },
4086bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::i16,     {  2 } },
4087bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::i8,      {  2 } },
40888bcb0991SDimitry Andric   };
4089bdd1243dSDimitry Andric   static const CostKindTblEntry POPCNT64CostTbl[] = { // 64-bit targets
4090bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::i64,     {  1, 1, 1, 1 } }, // popcnt
40918bcb0991SDimitry Andric   };
4092bdd1243dSDimitry Andric   static const CostKindTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets
4093bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::i32,     {  1, 1, 1, 1 } }, // popcnt
4094bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::i16,     {  1, 1, 2, 2 } }, // popcnt(zext())
4095bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::i8,      {  1, 1, 2, 2 } }, // popcnt(zext())
40960b57cec5SDimitry Andric   };
4097bdd1243dSDimitry Andric   static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
4098*0fca6ea1SDimitry Andric     { ISD::ABS,        MVT::i64,     {  1,  2,  3,  3 } }, // SUB+CMOV
409906c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::i64,     { 10, 12, 20, 22 } },
410006c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::i64,     {  1,  2,  1,  2 } },
4101bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::i64,     {  4 } }, // BSR+XOR or BSR+XOR+CMOV
4102bdd1243dSDimitry Andric     { ISD::CTLZ_ZERO_UNDEF, MVT::i64,{  1,  1,  1,  1 } }, // BSR+XOR
4103bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::i64,     {  3 } }, // TEST+BSF+CMOV/BRANCH
4104bdd1243dSDimitry Andric     { ISD::CTTZ_ZERO_UNDEF, MVT::i64,{  1,  1,  1,  1 } }, // BSR
4105bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::i64,     { 10,  6, 19, 19 } },
4106bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::i64,     {  2, 3, 1, 3 } },
4107bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::i64,     {  2, 3, 1, 3 } },
41085f757f3fSDimitry Andric     { X86ISD::VROTLI,  MVT::i64,     {  1, 1, 1, 1 } },
4109bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::i64,     {  4, 4, 1, 4 } },
4110bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::i64,     {  1,  3,  2,  3 } },
4111bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::i64,     {  1,  3,  2,  3 } },
4112bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::i64,     {  1,  3,  2,  3 } },
4113bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::i64,     {  1,  3,  2,  3 } },
4114bdd1243dSDimitry Andric     { ISD::SADDO,      MVT::i64,     {  1 } },
4115bdd1243dSDimitry Andric     { ISD::UADDO,      MVT::i64,     {  1 } },
4116bdd1243dSDimitry Andric     { ISD::UMULO,      MVT::i64,     {  2 } }, // mulq + seto
4117bdd1243dSDimitry Andric   };
4118bdd1243dSDimitry Andric   static const CostKindTblEntry X86CostTbl[] = { // 32 or 64-bit targets
4119*0fca6ea1SDimitry Andric     { ISD::ABS,        MVT::i32,     {  1,  2,  3,  3 } }, // SUB+XOR+SRA or SUB+CMOV
4120*0fca6ea1SDimitry Andric     { ISD::ABS,        MVT::i16,     {  2,  2,  3,  3 } }, // SUB+XOR+SRA or SUB+CMOV
4121*0fca6ea1SDimitry Andric     { ISD::ABS,        MVT::i8,      {  2,  4,  4,  3 } }, // SUB+XOR+SRA
412206c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::i32,     {  9, 12, 17, 19 } },
412306c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::i16,     {  9, 12, 17, 19 } },
412406c3fb27SDimitry Andric     { ISD::BITREVERSE, MVT::i8,      {  7,  9, 13, 14 } },
412506c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::i32,     {  1,  1,  1,  1 } },
412606c3fb27SDimitry Andric     { ISD::BSWAP,      MVT::i16,     {  1,  2,  1,  2 } }, // ROL
4127bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::i32,     {  4 } }, // BSR+XOR or BSR+XOR+CMOV
4128bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::i16,     {  4 } }, // BSR+XOR or BSR+XOR+CMOV
4129bdd1243dSDimitry Andric     { ISD::CTLZ,       MVT::i8,      {  4 } }, // BSR+XOR or BSR+XOR+CMOV
4130bdd1243dSDimitry Andric     { ISD::CTLZ_ZERO_UNDEF, MVT::i32,{  1,  1,  1,  1 } }, // BSR+XOR
4131bdd1243dSDimitry Andric     { ISD::CTLZ_ZERO_UNDEF, MVT::i16,{  2,  2,  3,  3 } }, // BSR+XOR
4132bdd1243dSDimitry Andric     { ISD::CTLZ_ZERO_UNDEF, MVT::i8, {  2,  2,  3,  3 } }, // BSR+XOR
4133bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::i32,     {  3 } }, // TEST+BSF+CMOV/BRANCH
4134bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::i16,     {  3 } }, // TEST+BSF+CMOV/BRANCH
4135bdd1243dSDimitry Andric     { ISD::CTTZ,       MVT::i8,      {  3 } }, // TEST+BSF+CMOV/BRANCH
4136bdd1243dSDimitry Andric     { ISD::CTTZ_ZERO_UNDEF, MVT::i32,{  1,  1,  1,  1 } }, // BSF
4137bdd1243dSDimitry Andric     { ISD::CTTZ_ZERO_UNDEF, MVT::i16,{  2,  2,  1,  1 } }, // BSF
4138bdd1243dSDimitry Andric     { ISD::CTTZ_ZERO_UNDEF, MVT::i8, {  2,  2,  1,  1 } }, // BSF
4139bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::i32,     {  8,  7, 15, 15 } },
4140bdd1243dSDimitry Andric     { ISD::CTPOP,      MVT::i16,     {  9,  8, 17, 17 } },
4141*0fca6ea1SDimitry Andric     { ISD::CTPOP,      MVT::i8,      {  7,  6,  6,  6 } },
4142bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::i32,     {  2,  3,  1,  3 } },
4143bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::i16,     {  2,  3,  1,  3 } },
4144bdd1243dSDimitry Andric     { ISD::ROTL,       MVT::i8,      {  2,  3,  1,  3 } },
4145bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::i32,     {  2,  3,  1,  3 } },
4146bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::i16,     {  2,  3,  1,  3 } },
4147bdd1243dSDimitry Andric     { ISD::ROTR,       MVT::i8,      {  2,  3,  1,  3 } },
41485f757f3fSDimitry Andric     { X86ISD::VROTLI,  MVT::i32,     {  1,  1,  1,  1 } },
41495f757f3fSDimitry Andric     { X86ISD::VROTLI,  MVT::i16,     {  1,  1,  1,  1 } },
41505f757f3fSDimitry Andric     { X86ISD::VROTLI,  MVT::i8,      {  1,  1,  1,  1 } },
4151bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::i32,     {  4,  4,  1,  4 } },
4152bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::i16,     {  4,  4,  2,  5 } },
4153bdd1243dSDimitry Andric     { ISD::FSHL,       MVT::i8,      {  4,  4,  2,  5 } },
4154bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::i32,     {  1,  2,  2,  3 } },
4155bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::i16,     {  1,  4,  2,  4 } },
4156bdd1243dSDimitry Andric     { ISD::SMAX,       MVT::i8,      {  1,  4,  2,  4 } },
4157bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::i32,     {  1,  2,  2,  3 } },
4158bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::i16,     {  1,  4,  2,  4 } },
4159bdd1243dSDimitry Andric     { ISD::SMIN,       MVT::i8,      {  1,  4,  2,  4 } },
4160bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::i32,     {  1,  2,  2,  3 } },
4161bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::i16,     {  1,  4,  2,  4 } },
4162bdd1243dSDimitry Andric     { ISD::UMAX,       MVT::i8,      {  1,  4,  2,  4 } },
4163bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::i32,     {  1,  2,  2,  3 } },
4164bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::i16,     {  1,  4,  2,  4 } },
4165bdd1243dSDimitry Andric     { ISD::UMIN,       MVT::i8,      {  1,  4,  2,  4 } },
4166bdd1243dSDimitry Andric     { ISD::SADDO,      MVT::i32,     {  1 } },
4167bdd1243dSDimitry Andric     { ISD::SADDO,      MVT::i16,     {  1 } },
4168bdd1243dSDimitry Andric     { ISD::SADDO,      MVT::i8,      {  1 } },
4169bdd1243dSDimitry Andric     { ISD::UADDO,      MVT::i32,     {  1 } },
4170bdd1243dSDimitry Andric     { ISD::UADDO,      MVT::i16,     {  1 } },
4171bdd1243dSDimitry Andric     { ISD::UADDO,      MVT::i8,      {  1 } },
4172bdd1243dSDimitry Andric     { ISD::UMULO,      MVT::i32,     {  2 } }, // mul + seto
4173bdd1243dSDimitry Andric     { ISD::UMULO,      MVT::i16,     {  2 } },
4174bdd1243dSDimitry Andric     { ISD::UMULO,      MVT::i8,      {  2 } },
41750b57cec5SDimitry Andric   };
41760b57cec5SDimitry Andric 
41775ffd83dbSDimitry Andric   Type *RetTy = ICA.getReturnType();
41780b57cec5SDimitry Andric   Type *OpTy = RetTy;
41795ffd83dbSDimitry Andric   Intrinsic::ID IID = ICA.getID();
41800b57cec5SDimitry Andric   unsigned ISD = ISD::DELETED_NODE;
41810b57cec5SDimitry Andric   switch (IID) {
41820b57cec5SDimitry Andric   default:
41830b57cec5SDimitry Andric     break;
4184e8d8bef9SDimitry Andric   case Intrinsic::abs:
4185e8d8bef9SDimitry Andric     ISD = ISD::ABS;
4186e8d8bef9SDimitry Andric     break;
41870b57cec5SDimitry Andric   case Intrinsic::bitreverse:
41880b57cec5SDimitry Andric     ISD = ISD::BITREVERSE;
41890b57cec5SDimitry Andric     break;
41900b57cec5SDimitry Andric   case Intrinsic::bswap:
41910b57cec5SDimitry Andric     ISD = ISD::BSWAP;
41920b57cec5SDimitry Andric     break;
41930b57cec5SDimitry Andric   case Intrinsic::ctlz:
41940b57cec5SDimitry Andric     ISD = ISD::CTLZ;
41950b57cec5SDimitry Andric     break;
41960b57cec5SDimitry Andric   case Intrinsic::ctpop:
41970b57cec5SDimitry Andric     ISD = ISD::CTPOP;
41980b57cec5SDimitry Andric     break;
41990b57cec5SDimitry Andric   case Intrinsic::cttz:
42000b57cec5SDimitry Andric     ISD = ISD::CTTZ;
42010b57cec5SDimitry Andric     break;
4202bdd1243dSDimitry Andric   case Intrinsic::fshl:
4203bdd1243dSDimitry Andric     ISD = ISD::FSHL;
4204bdd1243dSDimitry Andric     if (!ICA.isTypeBasedOnly()) {
4205bdd1243dSDimitry Andric       const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
42065f757f3fSDimitry Andric       if (Args[0] == Args[1]) {
4207bdd1243dSDimitry Andric         ISD = ISD::ROTL;
4208*0fca6ea1SDimitry Andric         // Handle uniform constant rotation amounts.
4209*0fca6ea1SDimitry Andric         // TODO: Handle funnel-shift cases.
4210*0fca6ea1SDimitry Andric         const APInt *Amt;
4211*0fca6ea1SDimitry Andric         if (Args[2] &&
4212*0fca6ea1SDimitry Andric             PatternMatch::match(Args[2], PatternMatch::m_APIntAllowPoison(Amt)))
42135f757f3fSDimitry Andric           ISD = X86ISD::VROTLI;
42145f757f3fSDimitry Andric       }
4215bdd1243dSDimitry Andric     }
4216bdd1243dSDimitry Andric     break;
4217bdd1243dSDimitry Andric   case Intrinsic::fshr:
4218bdd1243dSDimitry Andric     // FSHR has same costs so don't duplicate.
4219bdd1243dSDimitry Andric     ISD = ISD::FSHL;
4220bdd1243dSDimitry Andric     if (!ICA.isTypeBasedOnly()) {
4221bdd1243dSDimitry Andric       const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
42225f757f3fSDimitry Andric       if (Args[0] == Args[1]) {
4223bdd1243dSDimitry Andric         ISD = ISD::ROTR;
4224*0fca6ea1SDimitry Andric         // Handle uniform constant rotation amount.
4225*0fca6ea1SDimitry Andric         // TODO: Handle funnel-shift cases.
4226*0fca6ea1SDimitry Andric         const APInt *Amt;
4227*0fca6ea1SDimitry Andric         if (Args[2] &&
4228*0fca6ea1SDimitry Andric             PatternMatch::match(Args[2], PatternMatch::m_APIntAllowPoison(Amt)))
42295f757f3fSDimitry Andric           ISD = X86ISD::VROTLI;
42305f757f3fSDimitry Andric       }
4231bdd1243dSDimitry Andric     }
4232bdd1243dSDimitry Andric     break;
4233*0fca6ea1SDimitry Andric   case Intrinsic::lrint:
4234*0fca6ea1SDimitry Andric   case Intrinsic::llrint:
4235*0fca6ea1SDimitry Andric     // X86 can use the CVTP2SI instructions to lower lrint/llrint calls, which
4236*0fca6ea1SDimitry Andric     // have the same costs as the CVTTP2SI (fptosi) instructions
4237*0fca6ea1SDimitry Andric     if (!ICA.isTypeBasedOnly()) {
4238*0fca6ea1SDimitry Andric       const SmallVectorImpl<Type *> &ArgTys = ICA.getArgTypes();
4239*0fca6ea1SDimitry Andric       return getCastInstrCost(Instruction::FPToSI, RetTy, ArgTys[0],
4240*0fca6ea1SDimitry Andric                               TTI::CastContextHint::None, CostKind);
4241*0fca6ea1SDimitry Andric     }
4242*0fca6ea1SDimitry Andric     break;
42435ffd83dbSDimitry Andric   case Intrinsic::maxnum:
42445ffd83dbSDimitry Andric   case Intrinsic::minnum:
42455ffd83dbSDimitry Andric     // FMINNUM has same costs so don't duplicate.
42465ffd83dbSDimitry Andric     ISD = ISD::FMAXNUM;
42475ffd83dbSDimitry Andric     break;
42480b57cec5SDimitry Andric   case Intrinsic::sadd_sat:
42490b57cec5SDimitry Andric     ISD = ISD::SADDSAT;
42500b57cec5SDimitry Andric     break;
4251e8d8bef9SDimitry Andric   case Intrinsic::smax:
4252e8d8bef9SDimitry Andric     ISD = ISD::SMAX;
4253e8d8bef9SDimitry Andric     break;
4254e8d8bef9SDimitry Andric   case Intrinsic::smin:
4255e8d8bef9SDimitry Andric     ISD = ISD::SMIN;
4256e8d8bef9SDimitry Andric     break;
42570b57cec5SDimitry Andric   case Intrinsic::ssub_sat:
42580b57cec5SDimitry Andric     ISD = ISD::SSUBSAT;
42590b57cec5SDimitry Andric     break;
42600b57cec5SDimitry Andric   case Intrinsic::uadd_sat:
42610b57cec5SDimitry Andric     ISD = ISD::UADDSAT;
42620b57cec5SDimitry Andric     break;
4263e8d8bef9SDimitry Andric   case Intrinsic::umax:
4264e8d8bef9SDimitry Andric     ISD = ISD::UMAX;
4265e8d8bef9SDimitry Andric     break;
4266e8d8bef9SDimitry Andric   case Intrinsic::umin:
4267e8d8bef9SDimitry Andric     ISD = ISD::UMIN;
4268e8d8bef9SDimitry Andric     break;
42690b57cec5SDimitry Andric   case Intrinsic::usub_sat:
42700b57cec5SDimitry Andric     ISD = ISD::USUBSAT;
42710b57cec5SDimitry Andric     break;
42720b57cec5SDimitry Andric   case Intrinsic::sqrt:
42730b57cec5SDimitry Andric     ISD = ISD::FSQRT;
42740b57cec5SDimitry Andric     break;
42750b57cec5SDimitry Andric   case Intrinsic::sadd_with_overflow:
42760b57cec5SDimitry Andric   case Intrinsic::ssub_with_overflow:
42770b57cec5SDimitry Andric     // SSUBO has same costs so don't duplicate.
42780b57cec5SDimitry Andric     ISD = ISD::SADDO;
42790b57cec5SDimitry Andric     OpTy = RetTy->getContainedType(0);
42800b57cec5SDimitry Andric     break;
42810b57cec5SDimitry Andric   case Intrinsic::uadd_with_overflow:
42820b57cec5SDimitry Andric   case Intrinsic::usub_with_overflow:
42830b57cec5SDimitry Andric     // USUBO has same costs so don't duplicate.
42840b57cec5SDimitry Andric     ISD = ISD::UADDO;
42850b57cec5SDimitry Andric     OpTy = RetTy->getContainedType(0);
42860b57cec5SDimitry Andric     break;
4287e8d8bef9SDimitry Andric   case Intrinsic::umul_with_overflow:
4288e8d8bef9SDimitry Andric   case Intrinsic::smul_with_overflow:
4289e8d8bef9SDimitry Andric     // SMULO has same costs so don't duplicate.
4290e8d8bef9SDimitry Andric     ISD = ISD::UMULO;
4291e8d8bef9SDimitry Andric     OpTy = RetTy->getContainedType(0);
4292e8d8bef9SDimitry Andric     break;
42930b57cec5SDimitry Andric   }
42940b57cec5SDimitry Andric 
42950b57cec5SDimitry Andric   if (ISD != ISD::DELETED_NODE) {
4296*0fca6ea1SDimitry Andric     auto adjustTableCost = [&](int ISD, unsigned Cost,
4297*0fca6ea1SDimitry Andric                                std::pair<InstructionCost, MVT> LT,
4298*0fca6ea1SDimitry Andric                                FastMathFlags FMF) -> InstructionCost {
4299*0fca6ea1SDimitry Andric       InstructionCost LegalizationCost = LT.first;
4300*0fca6ea1SDimitry Andric       MVT MTy = LT.second;
4301*0fca6ea1SDimitry Andric 
4302*0fca6ea1SDimitry Andric       // If there are no NANs to deal with, then these are reduced to a
4303*0fca6ea1SDimitry Andric       // single MIN** or MAX** instruction instead of the MIN/CMP/SELECT that we
4304*0fca6ea1SDimitry Andric       // assume is used in the non-fast case.
4305*0fca6ea1SDimitry Andric       if (ISD == ISD::FMAXNUM || ISD == ISD::FMINNUM) {
4306*0fca6ea1SDimitry Andric         if (FMF.noNaNs())
4307*0fca6ea1SDimitry Andric           return LegalizationCost * 1;
4308*0fca6ea1SDimitry Andric       }
4309*0fca6ea1SDimitry Andric 
4310*0fca6ea1SDimitry Andric       // For cases where some ops can be folded into a load/store, assume free.
4311*0fca6ea1SDimitry Andric       if (MTy.isScalarInteger()) {
4312*0fca6ea1SDimitry Andric         if (ISD == ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4313*0fca6ea1SDimitry Andric           if (const Instruction *II = ICA.getInst()) {
4314*0fca6ea1SDimitry Andric             if (II->hasOneUse() && isa<StoreInst>(II->user_back()))
4315*0fca6ea1SDimitry Andric               return TTI::TCC_Free;
4316*0fca6ea1SDimitry Andric             if (auto *LI = dyn_cast<LoadInst>(II->getOperand(0))) {
4317*0fca6ea1SDimitry Andric               if (LI->hasOneUse())
4318*0fca6ea1SDimitry Andric                 return TTI::TCC_Free;
4319*0fca6ea1SDimitry Andric             }
4320*0fca6ea1SDimitry Andric           }
4321*0fca6ea1SDimitry Andric         }
4322*0fca6ea1SDimitry Andric       }
4323*0fca6ea1SDimitry Andric 
4324*0fca6ea1SDimitry Andric       return LegalizationCost * (int)Cost;
4325*0fca6ea1SDimitry Andric     };
4326*0fca6ea1SDimitry Andric 
43270b57cec5SDimitry Andric     // Legalize the type.
4328bdd1243dSDimitry Andric     std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(OpTy);
43290b57cec5SDimitry Andric     MVT MTy = LT.second;
43300b57cec5SDimitry Andric 
4331bdd1243dSDimitry Andric     // Without BMI/LZCNT see if we're only looking for a *_ZERO_UNDEF cost.
4332bdd1243dSDimitry Andric     if (((ISD == ISD::CTTZ && !ST->hasBMI()) ||
4333bdd1243dSDimitry Andric          (ISD == ISD::CTLZ && !ST->hasLZCNT())) &&
4334bdd1243dSDimitry Andric         !MTy.isVector() && !ICA.isTypeBasedOnly()) {
4335bdd1243dSDimitry Andric       const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
4336bdd1243dSDimitry Andric       if (auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4337bdd1243dSDimitry Andric         if (Cst->isAllOnesValue())
4338bdd1243dSDimitry Andric           ISD = ISD == ISD::CTTZ ? ISD::CTTZ_ZERO_UNDEF : ISD::CTLZ_ZERO_UNDEF;
4339bdd1243dSDimitry Andric     }
4340bdd1243dSDimitry Andric 
4341bdd1243dSDimitry Andric     // FSQRT is a single instruction.
4342bdd1243dSDimitry Andric     if (ISD == ISD::FSQRT && CostKind == TTI::TCK_CodeSize)
4343bdd1243dSDimitry Andric       return LT.first;
4344bdd1243dSDimitry Andric 
4345480093f4SDimitry Andric     if (ST->useGLMDivSqrtCosts())
43460b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
4347bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4348*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
43490b57cec5SDimitry Andric 
4350349cc55cSDimitry Andric     if (ST->useSLMArithCosts())
43510b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
4352bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4353*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
4354bdd1243dSDimitry Andric 
4355bdd1243dSDimitry Andric     if (ST->hasVBMI2())
4356bdd1243dSDimitry Andric       if (const auto *Entry = CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4357bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4358*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
43590b57cec5SDimitry Andric 
4360349cc55cSDimitry Andric     if (ST->hasBITALG())
4361349cc55cSDimitry Andric       if (const auto *Entry = CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4362bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4363*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
4364349cc55cSDimitry Andric 
4365349cc55cSDimitry Andric     if (ST->hasVPOPCNTDQ())
4366349cc55cSDimitry Andric       if (const auto *Entry = CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4367bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4368*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
4369*0fca6ea1SDimitry Andric 
4370*0fca6ea1SDimitry Andric     if (ST->hasGFNI())
4371*0fca6ea1SDimitry Andric       if (const auto *Entry = CostTableLookup(GFNICostTbl, ISD, MTy))
4372*0fca6ea1SDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4373*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
4374349cc55cSDimitry Andric 
43750b57cec5SDimitry Andric     if (ST->hasCDI())
43760b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
4377bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4378*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
43790b57cec5SDimitry Andric 
43800b57cec5SDimitry Andric     if (ST->hasBWI())
43810b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
4382bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4383*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
43840b57cec5SDimitry Andric 
43850b57cec5SDimitry Andric     if (ST->hasAVX512())
43860b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
4387bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4388*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
43890b57cec5SDimitry Andric 
43900b57cec5SDimitry Andric     if (ST->hasXOP())
43910b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
4392bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4393*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
43940b57cec5SDimitry Andric 
43950b57cec5SDimitry Andric     if (ST->hasAVX2())
43960b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
4397bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4398*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
43990b57cec5SDimitry Andric 
44000b57cec5SDimitry Andric     if (ST->hasAVX())
44010b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
4402bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4403*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44040b57cec5SDimitry Andric 
44050b57cec5SDimitry Andric     if (ST->hasSSE42())
44060b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
4407bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4408*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
4409e8d8bef9SDimitry Andric 
4410e8d8bef9SDimitry Andric     if (ST->hasSSE41())
4411e8d8bef9SDimitry Andric       if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
4412bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4413*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44140b57cec5SDimitry Andric 
44150b57cec5SDimitry Andric     if (ST->hasSSSE3())
44160b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
4417bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4418*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44190b57cec5SDimitry Andric 
44200b57cec5SDimitry Andric     if (ST->hasSSE2())
44210b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
4422bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4423*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44240b57cec5SDimitry Andric 
44250b57cec5SDimitry Andric     if (ST->hasSSE1())
44260b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
4427bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4428*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44290b57cec5SDimitry Andric 
44305ffd83dbSDimitry Andric     if (ST->hasBMI()) {
44315ffd83dbSDimitry Andric       if (ST->is64Bit())
44325ffd83dbSDimitry Andric         if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy))
4433bdd1243dSDimitry Andric           if (auto KindCost = Entry->Cost[CostKind])
4434*0fca6ea1SDimitry Andric             return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44355ffd83dbSDimitry Andric 
44365ffd83dbSDimitry Andric       if (const auto *Entry = CostTableLookup(BMI32CostTbl, ISD, MTy))
4437bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4438*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44395ffd83dbSDimitry Andric     }
44405ffd83dbSDimitry Andric 
44418bcb0991SDimitry Andric     if (ST->hasLZCNT()) {
44428bcb0991SDimitry Andric       if (ST->is64Bit())
44438bcb0991SDimitry Andric         if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy))
4444bdd1243dSDimitry Andric           if (auto KindCost = Entry->Cost[CostKind])
4445*0fca6ea1SDimitry Andric             return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44468bcb0991SDimitry Andric 
44478bcb0991SDimitry Andric       if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy))
4448bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4449*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44508bcb0991SDimitry Andric     }
44518bcb0991SDimitry Andric 
44528bcb0991SDimitry Andric     if (ST->hasPOPCNT()) {
44538bcb0991SDimitry Andric       if (ST->is64Bit())
44548bcb0991SDimitry Andric         if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy))
4455bdd1243dSDimitry Andric           if (auto KindCost = Entry->Cost[CostKind])
4456*0fca6ea1SDimitry Andric             return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44578bcb0991SDimitry Andric 
44588bcb0991SDimitry Andric       if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy))
4459bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4460*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
4461fe6060f1SDimitry Andric     }
4462fe6060f1SDimitry Andric 
44630b57cec5SDimitry Andric     if (ST->is64Bit())
44640b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
4465bdd1243dSDimitry Andric         if (auto KindCost = Entry->Cost[CostKind])
4466*0fca6ea1SDimitry Andric           return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44670b57cec5SDimitry Andric 
44680b57cec5SDimitry Andric     if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
4469bdd1243dSDimitry Andric       if (auto KindCost = Entry->Cost[CostKind])
4470*0fca6ea1SDimitry Andric         return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.getFlags());
44710b57cec5SDimitry Andric   }
44720b57cec5SDimitry Andric 
44735ffd83dbSDimitry Andric   return BaseT::getIntrinsicInstrCost(ICA, CostKind);
44740b57cec5SDimitry Andric }
44750b57cec5SDimitry Andric 
4476fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
4477bdd1243dSDimitry Andric                                                TTI::TargetCostKind CostKind,
4478bdd1243dSDimitry Andric                                                unsigned Index, Value *Op0,
4479bdd1243dSDimitry Andric                                                Value *Op1) {
4480480093f4SDimitry Andric   static const CostTblEntry SLMCostTbl[] = {
4481480093f4SDimitry Andric      { ISD::EXTRACT_VECTOR_ELT,       MVT::i8,      4 },
4482480093f4SDimitry Andric      { ISD::EXTRACT_VECTOR_ELT,       MVT::i16,     4 },
4483480093f4SDimitry Andric      { ISD::EXTRACT_VECTOR_ELT,       MVT::i32,     4 },
4484480093f4SDimitry Andric      { ISD::EXTRACT_VECTOR_ELT,       MVT::i64,     7 }
4485480093f4SDimitry Andric    };
4486480093f4SDimitry Andric 
44870b57cec5SDimitry Andric   assert(Val->isVectorTy() && "This must be a vector type");
44880b57cec5SDimitry Andric   Type *ScalarType = Val->getScalarType();
448981ad6265SDimitry Andric   InstructionCost RegisterFileMoveCost = 0;
44900b57cec5SDimitry Andric 
4491fe6060f1SDimitry Andric   // Non-immediate extraction/insertion can be handled as a sequence of
4492fe6060f1SDimitry Andric   // aliased loads+stores via the stack.
4493fe6060f1SDimitry Andric   if (Index == -1U && (Opcode == Instruction::ExtractElement ||
4494fe6060f1SDimitry Andric                        Opcode == Instruction::InsertElement)) {
4495fe6060f1SDimitry Andric     // TODO: On some SSE41+ targets, we expand to cmp+splat+select patterns:
4496fe6060f1SDimitry Andric     // inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0.
4497fe6060f1SDimitry Andric 
4498fe6060f1SDimitry Andric     // TODO: Move this to BasicTTIImpl.h? We'd need better gep + index handling.
4499fe6060f1SDimitry Andric     assert(isa<FixedVectorType>(Val) && "Fixed vector type expected");
4500fe6060f1SDimitry Andric     Align VecAlign = DL.getPrefTypeAlign(Val);
4501fe6060f1SDimitry Andric     Align SclAlign = DL.getPrefTypeAlign(ScalarType);
4502fe6060f1SDimitry Andric 
4503fe6060f1SDimitry Andric     // Extract - store vector to stack, load scalar.
4504fe6060f1SDimitry Andric     if (Opcode == Instruction::ExtractElement) {
4505bdd1243dSDimitry Andric       return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, CostKind) +
4506fe6060f1SDimitry Andric              getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
4507bdd1243dSDimitry Andric                              CostKind);
4508fe6060f1SDimitry Andric     }
4509fe6060f1SDimitry Andric     // Insert - store vector to stack, store scalar, load vector.
4510fe6060f1SDimitry Andric     if (Opcode == Instruction::InsertElement) {
4511bdd1243dSDimitry Andric       return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, CostKind) +
4512fe6060f1SDimitry Andric              getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,
4513bdd1243dSDimitry Andric                              CostKind) +
4514bdd1243dSDimitry Andric              getMemoryOpCost(Instruction::Load, Val, VecAlign, 0, CostKind);
4515fe6060f1SDimitry Andric     }
4516fe6060f1SDimitry Andric   }
4517fe6060f1SDimitry Andric 
45185ffd83dbSDimitry Andric   if (Index != -1U && (Opcode == Instruction::ExtractElement ||
45195ffd83dbSDimitry Andric                        Opcode == Instruction::InsertElement)) {
452081ad6265SDimitry Andric     // Extraction of vXi1 elements are now efficiently handled by MOVMSK.
452181ad6265SDimitry Andric     if (Opcode == Instruction::ExtractElement &&
452281ad6265SDimitry Andric         ScalarType->getScalarSizeInBits() == 1 &&
452381ad6265SDimitry Andric         cast<FixedVectorType>(Val)->getNumElements() > 1)
452481ad6265SDimitry Andric       return 1;
452581ad6265SDimitry Andric 
45260b57cec5SDimitry Andric     // Legalize the type.
4527bdd1243dSDimitry Andric     std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
45280b57cec5SDimitry Andric 
45290b57cec5SDimitry Andric     // This type is legalized to a scalar type.
45300b57cec5SDimitry Andric     if (!LT.second.isVector())
45310b57cec5SDimitry Andric       return 0;
45320b57cec5SDimitry Andric 
45330b57cec5SDimitry Andric     // The type may be split. Normalize the index to the new type.
453481ad6265SDimitry Andric     unsigned SizeInBits = LT.second.getSizeInBits();
45355ffd83dbSDimitry Andric     unsigned NumElts = LT.second.getVectorNumElements();
45365ffd83dbSDimitry Andric     unsigned SubNumElts = NumElts;
45375ffd83dbSDimitry Andric     Index = Index % NumElts;
45385ffd83dbSDimitry Andric 
45395ffd83dbSDimitry Andric     // For >128-bit vectors, we need to extract higher 128-bit subvectors.
45405ffd83dbSDimitry Andric     // For inserts, we also need to insert the subvector back.
454181ad6265SDimitry Andric     if (SizeInBits > 128) {
454281ad6265SDimitry Andric       assert((SizeInBits % 128) == 0 && "Illegal vector");
454381ad6265SDimitry Andric       unsigned NumSubVecs = SizeInBits / 128;
45445ffd83dbSDimitry Andric       SubNumElts = NumElts / NumSubVecs;
45455ffd83dbSDimitry Andric       if (SubNumElts <= Index) {
45465ffd83dbSDimitry Andric         RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
45475ffd83dbSDimitry Andric         Index %= SubNumElts;
45485ffd83dbSDimitry Andric       }
45495ffd83dbSDimitry Andric     }
45500b57cec5SDimitry Andric 
4551bdd1243dSDimitry Andric     MVT MScalarTy = LT.second.getScalarType();
4552bdd1243dSDimitry Andric     auto IsCheapPInsrPExtrInsertPS = [&]() {
4553bdd1243dSDimitry Andric       // Assume pinsr/pextr XMM <-> GPR is relatively cheap on all targets.
4554bdd1243dSDimitry Andric       // Also, assume insertps is relatively cheap on all >= SSE41 targets.
4555bdd1243dSDimitry Andric       return (MScalarTy == MVT::i16 && ST->hasSSE2()) ||
4556bdd1243dSDimitry Andric              (MScalarTy.isInteger() && ST->hasSSE41()) ||
4557bdd1243dSDimitry Andric              (MScalarTy == MVT::f32 && ST->hasSSE41() &&
4558bdd1243dSDimitry Andric               Opcode == Instruction::InsertElement);
4559bdd1243dSDimitry Andric     };
4560bdd1243dSDimitry Andric 
4561480093f4SDimitry Andric     if (Index == 0) {
45620b57cec5SDimitry Andric       // Floating point scalars are already located in index #0.
45635ffd83dbSDimitry Andric       // Many insertions to #0 can fold away for scalar fp-ops, so let's assume
45645ffd83dbSDimitry Andric       // true for all.
456506c3fb27SDimitry Andric       if (ScalarType->isFloatingPointTy() &&
456606c3fb27SDimitry Andric           (Opcode != Instruction::InsertElement || !Op0 ||
456706c3fb27SDimitry Andric            isa<UndefValue>(Op0)))
45685ffd83dbSDimitry Andric         return RegisterFileMoveCost;
4569480093f4SDimitry Andric 
4570bdd1243dSDimitry Andric       if (Opcode == Instruction::InsertElement &&
4571bdd1243dSDimitry Andric           isa_and_nonnull<UndefValue>(Op0)) {
4572bdd1243dSDimitry Andric         // Consider the gather cost to be cheap.
4573bdd1243dSDimitry Andric         if (isa_and_nonnull<LoadInst>(Op1))
4574bdd1243dSDimitry Andric           return RegisterFileMoveCost;
4575bdd1243dSDimitry Andric         if (!IsCheapPInsrPExtrInsertPS()) {
4576bdd1243dSDimitry Andric           // mov constant-to-GPR + movd/movq GPR -> XMM.
4577bdd1243dSDimitry Andric           if (isa_and_nonnull<Constant>(Op1) && Op1->getType()->isIntegerTy())
4578bdd1243dSDimitry Andric             return 2 + RegisterFileMoveCost;
4579bdd1243dSDimitry Andric           // Assume movd/movq GPR -> XMM is relatively cheap on all targets.
4580bdd1243dSDimitry Andric           return 1 + RegisterFileMoveCost;
4581bdd1243dSDimitry Andric         }
4582bdd1243dSDimitry Andric       }
4583bdd1243dSDimitry Andric 
45845ffd83dbSDimitry Andric       // Assume movd/movq XMM -> GPR is relatively cheap on all targets.
45855ffd83dbSDimitry Andric       if (ScalarType->isIntegerTy() && Opcode == Instruction::ExtractElement)
45865ffd83dbSDimitry Andric         return 1 + RegisterFileMoveCost;
4587480093f4SDimitry Andric     }
4588480093f4SDimitry Andric 
4589480093f4SDimitry Andric     int ISD = TLI->InstructionOpcodeToISD(Opcode);
4590480093f4SDimitry Andric     assert(ISD && "Unexpected vector opcode");
4591349cc55cSDimitry Andric     if (ST->useSLMArithCosts())
4592480093f4SDimitry Andric       if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
45935ffd83dbSDimitry Andric         return Entry->Cost + RegisterFileMoveCost;
45945ffd83dbSDimitry Andric 
4595bdd1243dSDimitry Andric     // Consider cheap cases.
4596bdd1243dSDimitry Andric     if (IsCheapPInsrPExtrInsertPS())
45975ffd83dbSDimitry Andric       return 1 + RegisterFileMoveCost;
45985ffd83dbSDimitry Andric 
45995ffd83dbSDimitry Andric     // For extractions we just need to shuffle the element to index 0, which
46005ffd83dbSDimitry Andric     // should be very cheap (assume cost = 1). For insertions we need to shuffle
46015ffd83dbSDimitry Andric     // the elements to its destination. In both cases we must handle the
46025ffd83dbSDimitry Andric     // subvector move(s).
46035ffd83dbSDimitry Andric     // If the vector type is already less than 128-bits then don't reduce it.
46045ffd83dbSDimitry Andric     // TODO: Under what circumstances should we shuffle using the full width?
4605fe6060f1SDimitry Andric     InstructionCost ShuffleCost = 1;
46065ffd83dbSDimitry Andric     if (Opcode == Instruction::InsertElement) {
46075ffd83dbSDimitry Andric       auto *SubTy = cast<VectorType>(Val);
46085ffd83dbSDimitry Andric       EVT VT = TLI->getValueType(DL, Val);
46095ffd83dbSDimitry Andric       if (VT.getScalarType() != MScalarTy || VT.getSizeInBits() >= 128)
46105ffd83dbSDimitry Andric         SubTy = FixedVectorType::get(ScalarType, SubNumElts);
4611bdd1243dSDimitry Andric       ShuffleCost = getShuffleCost(TTI::SK_PermuteTwoSrc, SubTy, std::nullopt,
4612bdd1243dSDimitry Andric                                    CostKind, 0, SubTy);
46135ffd83dbSDimitry Andric     }
46145ffd83dbSDimitry Andric     int IntOrFpCost = ScalarType->isFloatingPointTy() ? 0 : 1;
46155ffd83dbSDimitry Andric     return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
46160b57cec5SDimitry Andric   }
46170b57cec5SDimitry Andric 
4618bdd1243dSDimitry Andric   return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1) +
4619bdd1243dSDimitry Andric          RegisterFileMoveCost;
46200b57cec5SDimitry Andric }
46210b57cec5SDimitry Andric 
4622bdd1243dSDimitry Andric InstructionCost
4623bdd1243dSDimitry Andric X86TTIImpl::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
4624bdd1243dSDimitry Andric                                      bool Insert, bool Extract,
4625bdd1243dSDimitry Andric                                      TTI::TargetCostKind CostKind) {
462681ad6265SDimitry Andric   assert(DemandedElts.getBitWidth() ==
462781ad6265SDimitry Andric              cast<FixedVectorType>(Ty)->getNumElements() &&
462881ad6265SDimitry Andric          "Vector size mismatch");
462981ad6265SDimitry Andric 
4630bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
463181ad6265SDimitry Andric   MVT MScalarTy = LT.second.getScalarType();
4632bdd1243dSDimitry Andric   unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4633fe6060f1SDimitry Andric   InstructionCost Cost = 0;
46345ffd83dbSDimitry Andric 
4635bdd1243dSDimitry Andric   constexpr unsigned LaneBitWidth = 128;
4636bdd1243dSDimitry Andric   assert((LegalVectorBitWidth < LaneBitWidth ||
4637bdd1243dSDimitry Andric           (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4638bdd1243dSDimitry Andric          "Illegal vector");
4639bdd1243dSDimitry Andric 
4640bdd1243dSDimitry Andric   const int NumLegalVectors = *LT.first.getValue();
4641bdd1243dSDimitry Andric   assert(NumLegalVectors >= 0 && "Negative cost!");
4642bdd1243dSDimitry Andric 
46435ffd83dbSDimitry Andric   // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
46445ffd83dbSDimitry Andric   // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT.
46455ffd83dbSDimitry Andric   if (Insert) {
46465ffd83dbSDimitry Andric     if ((MScalarTy == MVT::i16 && ST->hasSSE2()) ||
46475ffd83dbSDimitry Andric         (MScalarTy.isInteger() && ST->hasSSE41()) ||
46485ffd83dbSDimitry Andric         (MScalarTy == MVT::f32 && ST->hasSSE41())) {
46495ffd83dbSDimitry Andric       // For types we can insert directly, insertion into 128-bit sub vectors is
46505ffd83dbSDimitry Andric       // cheap, followed by a cheap chain of concatenations.
4651bdd1243dSDimitry Andric       if (LegalVectorBitWidth <= LaneBitWidth) {
4652bdd1243dSDimitry Andric         Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert,
4653bdd1243dSDimitry Andric                                                 /*Extract*/ false, CostKind);
46545ffd83dbSDimitry Andric       } else {
4655e8d8bef9SDimitry Andric         // In each 128-lane, if at least one index is demanded but not all
4656e8d8bef9SDimitry Andric         // indices are demanded and this 128-lane is not the first 128-lane of
4657e8d8bef9SDimitry Andric         // the legalized-vector, then this 128-lane needs a extracti128; If in
4658e8d8bef9SDimitry Andric         // each 128-lane, there is at least one demanded index, this 128-lane
4659e8d8bef9SDimitry Andric         // needs a inserti128.
4660e8d8bef9SDimitry Andric 
4661e8d8bef9SDimitry Andric         // The following cases will help you build a better understanding:
4662e8d8bef9SDimitry Andric         // Assume we insert several elements into a v8i32 vector in avx2,
4663e8d8bef9SDimitry Andric         // Case#1: inserting into 1th index needs vpinsrd + inserti128.
4664e8d8bef9SDimitry Andric         // Case#2: inserting into 5th index needs extracti128 + vpinsrd +
4665e8d8bef9SDimitry Andric         // inserti128.
4666e8d8bef9SDimitry Andric         // Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128.
4667bdd1243dSDimitry Andric         assert((LegalVectorBitWidth % LaneBitWidth) == 0 && "Illegal vector");
4668bdd1243dSDimitry Andric         unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4669bdd1243dSDimitry Andric         unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4670bdd1243dSDimitry Andric         unsigned NumLegalElts =
4671bdd1243dSDimitry Andric             LT.second.getVectorNumElements() * NumLegalVectors;
4672bdd1243dSDimitry Andric         assert(NumLegalElts >= DemandedElts.getBitWidth() &&
4673bdd1243dSDimitry Andric                "Vector has been legalized to smaller element count");
4674bdd1243dSDimitry Andric         assert((NumLegalElts % NumLanesTotal) == 0 &&
4675bdd1243dSDimitry Andric                "Unexpected elts per lane");
4676bdd1243dSDimitry Andric         unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
46775ffd83dbSDimitry Andric 
4678bdd1243dSDimitry Andric         APInt WidenedDemandedElts = DemandedElts.zext(NumLegalElts);
4679bdd1243dSDimitry Andric         auto *LaneTy =
4680bdd1243dSDimitry Andric             FixedVectorType::get(Ty->getElementType(), NumEltsPerLane);
4681bdd1243dSDimitry Andric 
4682bdd1243dSDimitry Andric         for (unsigned I = 0; I != NumLanesTotal; ++I) {
4683bdd1243dSDimitry Andric           APInt LaneEltMask = WidenedDemandedElts.extractBits(
4684bdd1243dSDimitry Andric               NumEltsPerLane, NumEltsPerLane * I);
468506c3fb27SDimitry Andric           if (LaneEltMask.isZero())
4686bdd1243dSDimitry Andric             continue;
4687bdd1243dSDimitry Andric           // FIXME: we don't need to extract if all non-demanded elements
4688bdd1243dSDimitry Andric           //        are legalization-inserted padding.
4689bdd1243dSDimitry Andric           if (!LaneEltMask.isAllOnes())
4690bdd1243dSDimitry Andric             Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
4691bdd1243dSDimitry Andric                                    CostKind, I * NumEltsPerLane, LaneTy);
4692bdd1243dSDimitry Andric           Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, Insert,
4693bdd1243dSDimitry Andric                                                   /*Extract*/ false, CostKind);
4694bdd1243dSDimitry Andric         }
4695bdd1243dSDimitry Andric 
4696bdd1243dSDimitry Andric         APInt AffectedLanes =
4697bdd1243dSDimitry Andric             APIntOps::ScaleBitMask(WidenedDemandedElts, NumLanesTotal);
4698bdd1243dSDimitry Andric         APInt FullyAffectedLegalVectors = APIntOps::ScaleBitMask(
4699bdd1243dSDimitry Andric             AffectedLanes, NumLegalVectors, /*MatchAllBits=*/true);
4700bdd1243dSDimitry Andric         for (int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4701bdd1243dSDimitry Andric           for (unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4702bdd1243dSDimitry Andric             unsigned I = NumLegalLanes * LegalVec + Lane;
4703bdd1243dSDimitry Andric             // No need to insert unaffected lane; or lane 0 of each legal vector
4704bdd1243dSDimitry Andric             // iff ALL lanes of that vector were affected and will be inserted.
4705bdd1243dSDimitry Andric             if (!AffectedLanes[I] ||
4706bdd1243dSDimitry Andric                 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4707bdd1243dSDimitry Andric               continue;
4708bdd1243dSDimitry Andric             Cost += getShuffleCost(TTI::SK_InsertSubvector, Ty, std::nullopt,
4709bdd1243dSDimitry Andric                                    CostKind, I * NumEltsPerLane, LaneTy);
4710bdd1243dSDimitry Andric           }
4711bdd1243dSDimitry Andric         }
47125ffd83dbSDimitry Andric       }
47135ffd83dbSDimitry Andric     } else if (LT.second.isVector()) {
47145ffd83dbSDimitry Andric       // Without fast insertion, we need to use MOVD/MOVQ to pass each demanded
47155ffd83dbSDimitry Andric       // integer element as a SCALAR_TO_VECTOR, then we build the vector as a
47165ffd83dbSDimitry Andric       // series of UNPCK followed by CONCAT_VECTORS - all of these can be
47175ffd83dbSDimitry Andric       // considered cheap.
47185ffd83dbSDimitry Andric       if (Ty->isIntOrIntVectorTy())
471906c3fb27SDimitry Andric         Cost += DemandedElts.popcount();
47205ffd83dbSDimitry Andric 
47215ffd83dbSDimitry Andric       // Get the smaller of the legalized or original pow2-extended number of
47225ffd83dbSDimitry Andric       // vector elements, which represents the number of unpacks we'll end up
47235ffd83dbSDimitry Andric       // performing.
47245ffd83dbSDimitry Andric       unsigned NumElts = LT.second.getVectorNumElements();
47255ffd83dbSDimitry Andric       unsigned Pow2Elts =
47265ffd83dbSDimitry Andric           PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements());
47275ffd83dbSDimitry Andric       Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
47285ffd83dbSDimitry Andric     }
47295ffd83dbSDimitry Andric   }
47305ffd83dbSDimitry Andric 
473181ad6265SDimitry Andric   if (Extract) {
473281ad6265SDimitry Andric     // vXi1 can be efficiently extracted with MOVMSK.
473381ad6265SDimitry Andric     // TODO: AVX512 predicate mask handling.
473481ad6265SDimitry Andric     // NOTE: This doesn't work well for roundtrip scalarization.
473581ad6265SDimitry Andric     if (!Insert && Ty->getScalarSizeInBits() == 1 && !ST->hasAVX512()) {
473681ad6265SDimitry Andric       unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
473781ad6265SDimitry Andric       unsigned MaxElts = ST->hasAVX2() ? 32 : 16;
473881ad6265SDimitry Andric       unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
473981ad6265SDimitry Andric       return MOVMSKCost;
474081ad6265SDimitry Andric     }
474181ad6265SDimitry Andric 
474281ad6265SDimitry Andric     if (LT.second.isVector()) {
4743bdd1243dSDimitry Andric       unsigned NumLegalElts =
4744bdd1243dSDimitry Andric           LT.second.getVectorNumElements() * NumLegalVectors;
4745bdd1243dSDimitry Andric       assert(NumLegalElts >= DemandedElts.getBitWidth() &&
474681ad6265SDimitry Andric              "Vector has been legalized to smaller element count");
474781ad6265SDimitry Andric 
4748bdd1243dSDimitry Andric       // If we're extracting elements from a 128-bit subvector lane,
4749bdd1243dSDimitry Andric       // we only need to extract each lane once, not for every element.
4750bdd1243dSDimitry Andric       if (LegalVectorBitWidth > LaneBitWidth) {
4751bdd1243dSDimitry Andric         unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4752bdd1243dSDimitry Andric         unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4753bdd1243dSDimitry Andric         assert((NumLegalElts % NumLanesTotal) == 0 &&
4754bdd1243dSDimitry Andric                "Unexpected elts per lane");
4755bdd1243dSDimitry Andric         unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
475681ad6265SDimitry Andric 
475781ad6265SDimitry Andric         // Add cost for each demanded 128-bit subvector extraction.
475881ad6265SDimitry Andric         // Luckily this is a lot easier than for insertion.
4759bdd1243dSDimitry Andric         APInt WidenedDemandedElts = DemandedElts.zext(NumLegalElts);
4760bdd1243dSDimitry Andric         auto *LaneTy =
4761bdd1243dSDimitry Andric             FixedVectorType::get(Ty->getElementType(), NumEltsPerLane);
476281ad6265SDimitry Andric 
4763bdd1243dSDimitry Andric         for (unsigned I = 0; I != NumLanesTotal; ++I) {
4764bdd1243dSDimitry Andric           APInt LaneEltMask = WidenedDemandedElts.extractBits(
4765bdd1243dSDimitry Andric               NumEltsPerLane, I * NumEltsPerLane);
476606c3fb27SDimitry Andric           if (LaneEltMask.isZero())
4767bdd1243dSDimitry Andric             continue;
4768bdd1243dSDimitry Andric           Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
4769bdd1243dSDimitry Andric                                  CostKind, I * NumEltsPerLane, LaneTy);
4770bdd1243dSDimitry Andric           Cost += BaseT::getScalarizationOverhead(
4771bdd1243dSDimitry Andric               LaneTy, LaneEltMask, /*Insert*/ false, Extract, CostKind);
477281ad6265SDimitry Andric         }
477381ad6265SDimitry Andric 
477481ad6265SDimitry Andric         return Cost;
477581ad6265SDimitry Andric       }
477681ad6265SDimitry Andric     }
477781ad6265SDimitry Andric 
477881ad6265SDimitry Andric     // Fallback to default extraction.
4779bdd1243dSDimitry Andric     Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ false,
4780bdd1243dSDimitry Andric                                             Extract, CostKind);
478181ad6265SDimitry Andric   }
47825ffd83dbSDimitry Andric 
47835ffd83dbSDimitry Andric   return Cost;
47845ffd83dbSDimitry Andric }
47855ffd83dbSDimitry Andric 
4786349cc55cSDimitry Andric InstructionCost
4787349cc55cSDimitry Andric X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
4788349cc55cSDimitry Andric                                       int VF, const APInt &DemandedDstElts,
4789349cc55cSDimitry Andric                                       TTI::TargetCostKind CostKind) {
4790349cc55cSDimitry Andric   const unsigned EltTyBits = DL.getTypeSizeInBits(EltTy);
4791349cc55cSDimitry Andric   // We don't differentiate element types here, only element bit width.
4792349cc55cSDimitry Andric   EltTy = IntegerType::getIntNTy(EltTy->getContext(), EltTyBits);
4793349cc55cSDimitry Andric 
4794349cc55cSDimitry Andric   auto bailout = [&]() {
4795349cc55cSDimitry Andric     return BaseT::getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
4796349cc55cSDimitry Andric                                             DemandedDstElts, CostKind);
4797349cc55cSDimitry Andric   };
4798349cc55cSDimitry Andric 
4799349cc55cSDimitry Andric   // For now, only deal with AVX512 cases.
4800349cc55cSDimitry Andric   if (!ST->hasAVX512())
4801349cc55cSDimitry Andric     return bailout();
4802349cc55cSDimitry Andric 
4803349cc55cSDimitry Andric   // Do we have a native shuffle for this element type, or should we promote?
4804349cc55cSDimitry Andric   unsigned PromEltTyBits = EltTyBits;
4805349cc55cSDimitry Andric   switch (EltTyBits) {
4806349cc55cSDimitry Andric   case 32:
4807349cc55cSDimitry Andric   case 64:
4808349cc55cSDimitry Andric     break; // AVX512F.
4809349cc55cSDimitry Andric   case 16:
4810349cc55cSDimitry Andric     if (!ST->hasBWI())
4811349cc55cSDimitry Andric       PromEltTyBits = 32; // promote to i32, AVX512F.
4812349cc55cSDimitry Andric     break;                // AVX512BW
4813349cc55cSDimitry Andric   case 8:
4814349cc55cSDimitry Andric     if (!ST->hasVBMI())
4815349cc55cSDimitry Andric       PromEltTyBits = 32; // promote to i32, AVX512F.
4816349cc55cSDimitry Andric     break;                // AVX512VBMI
4817349cc55cSDimitry Andric   case 1:
4818349cc55cSDimitry Andric     // There is no support for shuffling i1 elements. We *must* promote.
4819349cc55cSDimitry Andric     if (ST->hasBWI()) {
4820349cc55cSDimitry Andric       if (ST->hasVBMI())
4821349cc55cSDimitry Andric         PromEltTyBits = 8; // promote to i8, AVX512VBMI.
4822349cc55cSDimitry Andric       else
4823349cc55cSDimitry Andric         PromEltTyBits = 16; // promote to i16, AVX512BW.
4824349cc55cSDimitry Andric       break;
4825349cc55cSDimitry Andric     }
48264824e7fdSDimitry Andric     PromEltTyBits = 32; // promote to i32, AVX512F.
48274824e7fdSDimitry Andric     break;
4828349cc55cSDimitry Andric   default:
4829349cc55cSDimitry Andric     return bailout();
4830349cc55cSDimitry Andric   }
4831349cc55cSDimitry Andric   auto *PromEltTy = IntegerType::getIntNTy(EltTy->getContext(), PromEltTyBits);
4832349cc55cSDimitry Andric 
4833349cc55cSDimitry Andric   auto *SrcVecTy = FixedVectorType::get(EltTy, VF);
4834349cc55cSDimitry Andric   auto *PromSrcVecTy = FixedVectorType::get(PromEltTy, VF);
4835349cc55cSDimitry Andric 
4836349cc55cSDimitry Andric   int NumDstElements = VF * ReplicationFactor;
4837349cc55cSDimitry Andric   auto *PromDstVecTy = FixedVectorType::get(PromEltTy, NumDstElements);
4838349cc55cSDimitry Andric   auto *DstVecTy = FixedVectorType::get(EltTy, NumDstElements);
4839349cc55cSDimitry Andric 
4840349cc55cSDimitry Andric   // Legalize the types.
4841bdd1243dSDimitry Andric   MVT LegalSrcVecTy = getTypeLegalizationCost(SrcVecTy).second;
4842bdd1243dSDimitry Andric   MVT LegalPromSrcVecTy = getTypeLegalizationCost(PromSrcVecTy).second;
4843bdd1243dSDimitry Andric   MVT LegalPromDstVecTy = getTypeLegalizationCost(PromDstVecTy).second;
4844bdd1243dSDimitry Andric   MVT LegalDstVecTy = getTypeLegalizationCost(DstVecTy).second;
4845349cc55cSDimitry Andric   // They should have legalized into vector types.
4846349cc55cSDimitry Andric   if (!LegalSrcVecTy.isVector() || !LegalPromSrcVecTy.isVector() ||
4847349cc55cSDimitry Andric       !LegalPromDstVecTy.isVector() || !LegalDstVecTy.isVector())
4848349cc55cSDimitry Andric     return bailout();
4849349cc55cSDimitry Andric 
4850349cc55cSDimitry Andric   if (PromEltTyBits != EltTyBits) {
4851349cc55cSDimitry Andric     // If we have to perform the shuffle with wider elt type than our data type,
4852349cc55cSDimitry Andric     // then we will first need to anyext (we don't care about the new bits)
4853349cc55cSDimitry Andric     // the source elements, and then truncate Dst elements.
4854349cc55cSDimitry Andric     InstructionCost PromotionCost;
4855349cc55cSDimitry Andric     PromotionCost += getCastInstrCost(
4856349cc55cSDimitry Andric         Instruction::SExt, /*Dst=*/PromSrcVecTy, /*Src=*/SrcVecTy,
4857349cc55cSDimitry Andric         TargetTransformInfo::CastContextHint::None, CostKind);
4858349cc55cSDimitry Andric     PromotionCost +=
4859349cc55cSDimitry Andric         getCastInstrCost(Instruction::Trunc, /*Dst=*/DstVecTy,
4860349cc55cSDimitry Andric                          /*Src=*/PromDstVecTy,
4861349cc55cSDimitry Andric                          TargetTransformInfo::CastContextHint::None, CostKind);
4862349cc55cSDimitry Andric     return PromotionCost + getReplicationShuffleCost(PromEltTy,
4863349cc55cSDimitry Andric                                                      ReplicationFactor, VF,
4864349cc55cSDimitry Andric                                                      DemandedDstElts, CostKind);
4865349cc55cSDimitry Andric   }
4866349cc55cSDimitry Andric 
4867349cc55cSDimitry Andric   assert(LegalSrcVecTy.getScalarSizeInBits() == EltTyBits &&
4868349cc55cSDimitry Andric          LegalSrcVecTy.getScalarType() == LegalDstVecTy.getScalarType() &&
4869349cc55cSDimitry Andric          "We expect that the legalization doesn't affect the element width, "
4870349cc55cSDimitry Andric          "doesn't coalesce/split elements.");
4871349cc55cSDimitry Andric 
4872349cc55cSDimitry Andric   unsigned NumEltsPerDstVec = LegalDstVecTy.getVectorNumElements();
4873349cc55cSDimitry Andric   unsigned NumDstVectors =
4874349cc55cSDimitry Andric       divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
4875349cc55cSDimitry Andric 
4876349cc55cSDimitry Andric   auto *SingleDstVecTy = FixedVectorType::get(EltTy, NumEltsPerDstVec);
4877349cc55cSDimitry Andric 
4878349cc55cSDimitry Andric   // Not all the produced Dst elements may be demanded. In our case,
4879349cc55cSDimitry Andric   // given that a single Dst vector is formed by a single shuffle,
4880349cc55cSDimitry Andric   // if all elements that will form a single Dst vector aren't demanded,
4881349cc55cSDimitry Andric   // then we won't need to do that shuffle, so adjust the cost accordingly.
4882349cc55cSDimitry Andric   APInt DemandedDstVectors = APIntOps::ScaleBitMask(
488381ad6265SDimitry Andric       DemandedDstElts.zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
488406c3fb27SDimitry Andric   unsigned NumDstVectorsDemanded = DemandedDstVectors.popcount();
4885349cc55cSDimitry Andric 
4886bdd1243dSDimitry Andric   InstructionCost SingleShuffleCost = getShuffleCost(
4887bdd1243dSDimitry Andric       TTI::SK_PermuteSingleSrc, SingleDstVecTy, /*Mask=*/std::nullopt, CostKind,
4888bdd1243dSDimitry Andric       /*Index=*/0, /*SubTp=*/nullptr);
4889349cc55cSDimitry Andric   return NumDstVectorsDemanded * SingleShuffleCost;
4890349cc55cSDimitry Andric }
4891349cc55cSDimitry Andric 
4892fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
4893fe6060f1SDimitry Andric                                             MaybeAlign Alignment,
4894fe6060f1SDimitry Andric                                             unsigned AddressSpace,
48955ffd83dbSDimitry Andric                                             TTI::TargetCostKind CostKind,
4896bdd1243dSDimitry Andric                                             TTI::OperandValueInfo OpInfo,
4897480093f4SDimitry Andric                                             const Instruction *I) {
48985ffd83dbSDimitry Andric   // TODO: Handle other cost kinds.
48995ffd83dbSDimitry Andric   if (CostKind != TTI::TCK_RecipThroughput) {
4900e8d8bef9SDimitry Andric     if (auto *SI = dyn_cast_or_null<StoreInst>(I)) {
49015ffd83dbSDimitry Andric       // Store instruction with index and scale costs 2 Uops.
49025ffd83dbSDimitry Andric       // Check the preceding GEP to identify non-const indices.
4903e8d8bef9SDimitry Andric       if (auto *GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
49045ffd83dbSDimitry Andric         if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); }))
49055ffd83dbSDimitry Andric           return TTI::TCC_Basic * 2;
49065ffd83dbSDimitry Andric       }
49075ffd83dbSDimitry Andric     }
49085ffd83dbSDimitry Andric     return TTI::TCC_Basic;
49095ffd83dbSDimitry Andric   }
49105ffd83dbSDimitry Andric 
4911fe6060f1SDimitry Andric   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4912fe6060f1SDimitry Andric          "Invalid Opcode");
49135ffd83dbSDimitry Andric   // Type legalization can't handle structs
49145ffd83dbSDimitry Andric   if (TLI->getValueType(DL, Src, true) == MVT::Other)
49155ffd83dbSDimitry Andric     return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
49165ffd83dbSDimitry Andric                                   CostKind);
49175ffd83dbSDimitry Andric 
49180b57cec5SDimitry Andric   // Legalize the type.
4919bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
49200b57cec5SDimitry Andric 
4921fe6060f1SDimitry Andric   auto *VTy = dyn_cast<FixedVectorType>(Src);
4922fe6060f1SDimitry Andric 
4923bdd1243dSDimitry Andric   InstructionCost Cost = 0;
4924bdd1243dSDimitry Andric 
4925bdd1243dSDimitry Andric   // Add a cost for constant load to vector.
4926bdd1243dSDimitry Andric   if (Opcode == Instruction::Store && OpInfo.isConstant())
4927bdd1243dSDimitry Andric     Cost += getMemoryOpCost(Instruction::Load, Src, DL.getABITypeAlign(Src),
4928bdd1243dSDimitry Andric                             /*AddressSpace=*/0, CostKind);
4929bdd1243dSDimitry Andric 
4930fe6060f1SDimitry Andric   // Handle the simple case of non-vectors.
4931fe6060f1SDimitry Andric   // NOTE: this assumes that legalization never creates vector from scalars!
4932bdd1243dSDimitry Andric   if (!VTy || !LT.second.isVector()) {
49330b57cec5SDimitry Andric     // Each load/store unit costs 1.
4934bdd1243dSDimitry Andric     return (LT.second.isFloatingPoint() ? Cost : 0) + LT.first * 1;
4935bdd1243dSDimitry Andric   }
49360b57cec5SDimitry Andric 
4937fe6060f1SDimitry Andric   bool IsLoad = Opcode == Instruction::Load;
4938fe6060f1SDimitry Andric 
4939fe6060f1SDimitry Andric   Type *EltTy = VTy->getElementType();
4940fe6060f1SDimitry Andric 
4941fe6060f1SDimitry Andric   const int EltTyBits = DL.getTypeSizeInBits(EltTy);
4942fe6060f1SDimitry Andric 
4943fe6060f1SDimitry Andric   // Source of truth: how many elements were there in the original IR vector?
4944fe6060f1SDimitry Andric   const unsigned SrcNumElt = VTy->getNumElements();
4945fe6060f1SDimitry Andric 
4946fe6060f1SDimitry Andric   // How far have we gotten?
4947fe6060f1SDimitry Andric   int NumEltRemaining = SrcNumElt;
4948fe6060f1SDimitry Andric   // Note that we intentionally capture by-reference, NumEltRemaining changes.
4949fe6060f1SDimitry Andric   auto NumEltDone = [&]() { return SrcNumElt - NumEltRemaining; };
4950fe6060f1SDimitry Andric 
4951fe6060f1SDimitry Andric   const int MaxLegalOpSizeBytes = divideCeil(LT.second.getSizeInBits(), 8);
4952fe6060f1SDimitry Andric 
4953fe6060f1SDimitry Andric   // Note that even if we can store 64 bits of an XMM, we still operate on XMM.
4954fe6060f1SDimitry Andric   const unsigned XMMBits = 128;
4955fe6060f1SDimitry Andric   if (XMMBits % EltTyBits != 0)
4956fe6060f1SDimitry Andric     // Vector size must be a multiple of the element size. I.e. no padding.
4957fe6060f1SDimitry Andric     return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
4958fe6060f1SDimitry Andric                                   CostKind);
4959fe6060f1SDimitry Andric   const int NumEltPerXMM = XMMBits / EltTyBits;
4960fe6060f1SDimitry Andric 
4961fe6060f1SDimitry Andric   auto *XMMVecTy = FixedVectorType::get(EltTy, NumEltPerXMM);
4962fe6060f1SDimitry Andric 
4963fe6060f1SDimitry Andric   for (int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
4964fe6060f1SDimitry Andric        NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
4965fe6060f1SDimitry Andric     // How many elements would a single op deal with at once?
4966fe6060f1SDimitry Andric     if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
4967fe6060f1SDimitry Andric       // Vector size must be a multiple of the element size. I.e. no padding.
4968fe6060f1SDimitry Andric       return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
4969fe6060f1SDimitry Andric                                     CostKind);
4970fe6060f1SDimitry Andric     int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
4971fe6060f1SDimitry Andric 
4972fe6060f1SDimitry Andric     assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 && "How'd we get here?");
4973fe6060f1SDimitry Andric     assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
4974fe6060f1SDimitry Andric             (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
4975fe6060f1SDimitry Andric            "Unless we haven't halved the op size yet, "
4976fe6060f1SDimitry Andric            "we have less than two op's sized units of work left.");
4977fe6060f1SDimitry Andric 
4978fe6060f1SDimitry Andric     auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
4979fe6060f1SDimitry Andric                           ? FixedVectorType::get(EltTy, CurrNumEltPerOp)
4980fe6060f1SDimitry Andric                           : XMMVecTy;
4981fe6060f1SDimitry Andric 
4982fe6060f1SDimitry Andric     assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
4983fe6060f1SDimitry Andric            "After halving sizes, the vector elt count is no longer a multiple "
4984fe6060f1SDimitry Andric            "of number of elements per operation?");
4985fe6060f1SDimitry Andric     auto *CoalescedVecTy =
4986fe6060f1SDimitry Andric         CurrNumEltPerOp == 1
4987fe6060f1SDimitry Andric             ? CurrVecTy
4988fe6060f1SDimitry Andric             : FixedVectorType::get(
4989fe6060f1SDimitry Andric                   IntegerType::get(Src->getContext(),
4990fe6060f1SDimitry Andric                                    EltTyBits * CurrNumEltPerOp),
4991fe6060f1SDimitry Andric                   CurrVecTy->getNumElements() / CurrNumEltPerOp);
4992fe6060f1SDimitry Andric     assert(DL.getTypeSizeInBits(CoalescedVecTy) ==
4993fe6060f1SDimitry Andric                DL.getTypeSizeInBits(CurrVecTy) &&
4994fe6060f1SDimitry Andric            "coalesciing elements doesn't change vector width.");
4995fe6060f1SDimitry Andric 
4996fe6060f1SDimitry Andric     while (NumEltRemaining > 0) {
4997fe6060f1SDimitry Andric       assert(SubVecEltsLeft >= 0 && "Subreg element count overconsumtion?");
4998fe6060f1SDimitry Andric 
4999fe6060f1SDimitry Andric       // Can we use this vector size, as per the remaining element count?
5000fe6060f1SDimitry Andric       // Iff the vector is naturally aligned, we can do a wide load regardless.
5001fe6060f1SDimitry Andric       if (NumEltRemaining < CurrNumEltPerOp &&
5002fe6060f1SDimitry Andric           (!IsLoad || Alignment.valueOrOne() < CurrOpSizeBytes) &&
5003fe6060f1SDimitry Andric           CurrOpSizeBytes != 1)
5004fe6060f1SDimitry Andric         break; // Try smalled vector size.
5005fe6060f1SDimitry Andric 
5006fe6060f1SDimitry Andric       bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
5007fe6060f1SDimitry Andric 
5008fe6060f1SDimitry Andric       // If we have fully processed the previous reg, we need to replenish it.
5009fe6060f1SDimitry Andric       if (SubVecEltsLeft == 0) {
5010fe6060f1SDimitry Andric         SubVecEltsLeft += CurrVecTy->getNumElements();
5011fe6060f1SDimitry Andric         // And that's free only for the 0'th subvector of a legalized vector.
5012fe6060f1SDimitry Andric         if (!Is0thSubVec)
5013fe6060f1SDimitry Andric           Cost += getShuffleCost(IsLoad ? TTI::ShuffleKind::SK_InsertSubvector
5014fe6060f1SDimitry Andric                                         : TTI::ShuffleKind::SK_ExtractSubvector,
5015bdd1243dSDimitry Andric                                  VTy, std::nullopt, CostKind, NumEltDone(),
5016bdd1243dSDimitry Andric                                  CurrVecTy);
5017fe6060f1SDimitry Andric       }
5018fe6060f1SDimitry Andric 
5019fe6060f1SDimitry Andric       // While we can directly load/store ZMM, YMM, and 64-bit halves of XMM,
5020fe6060f1SDimitry Andric       // for smaller widths (32/16/8) we have to insert/extract them separately.
5021fe6060f1SDimitry Andric       // Again, it's free for the 0'th subreg (if op is 32/64 bit wide,
5022fe6060f1SDimitry Andric       // but let's pretend that it is also true for 16/8 bit wide ops...)
5023fe6060f1SDimitry Andric       if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5024fe6060f1SDimitry Andric         int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5025fe6060f1SDimitry Andric         assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 && "");
5026fe6060f1SDimitry Andric         int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5027fe6060f1SDimitry Andric         APInt DemandedElts =
5028fe6060f1SDimitry Andric             APInt::getBitsSet(CoalescedVecTy->getNumElements(),
5029fe6060f1SDimitry Andric                               CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
503006c3fb27SDimitry Andric         assert(DemandedElts.popcount() == 1 && "Inserting single value");
5031fe6060f1SDimitry Andric         Cost += getScalarizationOverhead(CoalescedVecTy, DemandedElts, IsLoad,
5032bdd1243dSDimitry Andric                                          !IsLoad, CostKind);
5033fe6060f1SDimitry Andric       }
5034fe6060f1SDimitry Andric 
5035fe6060f1SDimitry Andric       // This isn't exactly right. We're using slow unaligned 32-byte accesses
5036fe6060f1SDimitry Andric       // as a proxy for a double-pumped AVX memory interface such as on
5037fe6060f1SDimitry Andric       // Sandybridge.
503806c3fb27SDimitry Andric       // Sub-32-bit loads/stores will be slower either with PINSR*/PEXTR* or
503906c3fb27SDimitry Andric       // will be scalarized.
5040fe6060f1SDimitry Andric       if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5041fe6060f1SDimitry Andric         Cost += 2;
504206c3fb27SDimitry Andric       else if (CurrOpSizeBytes < 4)
504306c3fb27SDimitry Andric         Cost += 2;
5044fe6060f1SDimitry Andric       else
5045fe6060f1SDimitry Andric         Cost += 1;
5046fe6060f1SDimitry Andric 
5047fe6060f1SDimitry Andric       SubVecEltsLeft -= CurrNumEltPerOp;
5048fe6060f1SDimitry Andric       NumEltRemaining -= CurrNumEltPerOp;
5049fe6060f1SDimitry Andric       Alignment = commonAlignment(Alignment.valueOrOne(), CurrOpSizeBytes);
5050fe6060f1SDimitry Andric     }
5051fe6060f1SDimitry Andric   }
5052fe6060f1SDimitry Andric 
5053fe6060f1SDimitry Andric   assert(NumEltRemaining <= 0 && "Should have processed all the elements.");
50540b57cec5SDimitry Andric 
50550b57cec5SDimitry Andric   return Cost;
50560b57cec5SDimitry Andric }
50570b57cec5SDimitry Andric 
5058fe6060f1SDimitry Andric InstructionCost
5059fe6060f1SDimitry Andric X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
5060fe6060f1SDimitry Andric                                   unsigned AddressSpace,
50615ffd83dbSDimitry Andric                                   TTI::TargetCostKind CostKind) {
50620b57cec5SDimitry Andric   bool IsLoad = (Instruction::Load == Opcode);
50630b57cec5SDimitry Andric   bool IsStore = (Instruction::Store == Opcode);
50640b57cec5SDimitry Andric 
50655ffd83dbSDimitry Andric   auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
50660b57cec5SDimitry Andric   if (!SrcVTy)
50670b57cec5SDimitry Andric     // To calculate scalar take the regular cost, without mask
50685ffd83dbSDimitry Andric     return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace, CostKind);
50690b57cec5SDimitry Andric 
50705ffd83dbSDimitry Andric   unsigned NumElem = SrcVTy->getNumElements();
50715ffd83dbSDimitry Andric   auto *MaskTy =
50725ffd83dbSDimitry Andric       FixedVectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
50735ffd83dbSDimitry Andric   if ((IsLoad && !isLegalMaskedLoad(SrcVTy, Alignment)) ||
5074fe6060f1SDimitry Andric       (IsStore && !isLegalMaskedStore(SrcVTy, Alignment))) {
50750b57cec5SDimitry Andric     // Scalarization
5076349cc55cSDimitry Andric     APInt DemandedElts = APInt::getAllOnes(NumElem);
5077bdd1243dSDimitry Andric     InstructionCost MaskSplitCost = getScalarizationOverhead(
5078bdd1243dSDimitry Andric         MaskTy, DemandedElts, /*Insert*/ false, /*Extract*/ true, CostKind);
5079fe6060f1SDimitry Andric     InstructionCost ScalarCompareCost = getCmpSelInstrCost(
50805ffd83dbSDimitry Andric         Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr,
5081e8d8bef9SDimitry Andric         CmpInst::BAD_ICMP_PREDICATE, CostKind);
5082fe6060f1SDimitry Andric     InstructionCost BranchCost = getCFInstrCost(Instruction::Br, CostKind);
5083fe6060f1SDimitry Andric     InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5084bdd1243dSDimitry Andric     InstructionCost ValueSplitCost = getScalarizationOverhead(
5085bdd1243dSDimitry Andric         SrcVTy, DemandedElts, IsLoad, IsStore, CostKind);
5086fe6060f1SDimitry Andric     InstructionCost MemopCost =
50870b57cec5SDimitry Andric         NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
50885ffd83dbSDimitry Andric                                          Alignment, AddressSpace, CostKind);
50890b57cec5SDimitry Andric     return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
50900b57cec5SDimitry Andric   }
50910b57cec5SDimitry Andric 
50920b57cec5SDimitry Andric   // Legalize the type.
5093bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcVTy);
50940b57cec5SDimitry Andric   auto VT = TLI->getValueType(DL, SrcVTy);
5095fe6060f1SDimitry Andric   InstructionCost Cost = 0;
5096*0fca6ea1SDimitry Andric   MVT Ty = LT.second;
5097*0fca6ea1SDimitry Andric   if (Ty == MVT::i16 || Ty == MVT::i32 || Ty == MVT::i64)
5098*0fca6ea1SDimitry Andric     // APX masked load/store for scalar is cheap.
5099*0fca6ea1SDimitry Andric     return Cost + LT.first;
5100*0fca6ea1SDimitry Andric 
5101*0fca6ea1SDimitry Andric   if (VT.isSimple() && Ty != VT.getSimpleVT() &&
51020b57cec5SDimitry Andric       LT.second.getVectorNumElements() == NumElem)
5103fe6060f1SDimitry Andric     // Promotion requires extend/truncate for data and a shuffle for mask.
5104bdd1243dSDimitry Andric     Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, std::nullopt,
5105bdd1243dSDimitry Andric                            CostKind, 0, nullptr) +
5106bdd1243dSDimitry Andric             getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, std::nullopt,
5107bdd1243dSDimitry Andric                            CostKind, 0, nullptr);
51080b57cec5SDimitry Andric 
5109*0fca6ea1SDimitry Andric   else if (LT.first * Ty.getVectorNumElements() > NumElem) {
51105ffd83dbSDimitry Andric     auto *NewMaskTy = FixedVectorType::get(MaskTy->getElementType(),
5111*0fca6ea1SDimitry Andric                                            Ty.getVectorNumElements());
51120b57cec5SDimitry Andric     // Expanding requires fill mask with zeroes
5113bdd1243dSDimitry Andric     Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, std::nullopt,
5114bdd1243dSDimitry Andric                            CostKind, 0, MaskTy);
51150b57cec5SDimitry Andric   }
51160b57cec5SDimitry Andric 
51170b57cec5SDimitry Andric   // Pre-AVX512 - each maskmov load costs 2 + store costs ~8.
51180b57cec5SDimitry Andric   if (!ST->hasAVX512())
51190b57cec5SDimitry Andric     return Cost + LT.first * (IsLoad ? 2 : 8);
51200b57cec5SDimitry Andric 
5121bdd1243dSDimitry Andric   // AVX-512 masked load/store is cheaper
51220b57cec5SDimitry Andric   return Cost + LT.first;
51230b57cec5SDimitry Andric }
51240b57cec5SDimitry Andric 
512506c3fb27SDimitry Andric InstructionCost
512606c3fb27SDimitry Andric X86TTIImpl::getPointersChainCost(ArrayRef<const Value *> Ptrs,
512706c3fb27SDimitry Andric                                  const Value *Base,
512806c3fb27SDimitry Andric                                  const TTI::PointersChainInfo &Info,
512906c3fb27SDimitry Andric                                  Type *AccessTy, TTI::TargetCostKind CostKind) {
513006c3fb27SDimitry Andric   if (Info.isSameBase() && Info.isKnownStride()) {
513106c3fb27SDimitry Andric     // If all the pointers have known stride all the differences are translated
513206c3fb27SDimitry Andric     // into constants. X86 memory addressing allows encoding it into
513306c3fb27SDimitry Andric     // displacement. So we just need to take the base GEP cost.
513406c3fb27SDimitry Andric     if (const auto *BaseGEP = dyn_cast<GetElementPtrInst>(Base)) {
513506c3fb27SDimitry Andric       SmallVector<const Value *> Indices(BaseGEP->indices());
513606c3fb27SDimitry Andric       return getGEPCost(BaseGEP->getSourceElementType(),
513706c3fb27SDimitry Andric                         BaseGEP->getPointerOperand(), Indices, nullptr,
513806c3fb27SDimitry Andric                         CostKind);
513906c3fb27SDimitry Andric     }
514006c3fb27SDimitry Andric     return TTI::TCC_Free;
514106c3fb27SDimitry Andric   }
514206c3fb27SDimitry Andric   return BaseT::getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
514306c3fb27SDimitry Andric }
514406c3fb27SDimitry Andric 
5145fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,
5146fe6060f1SDimitry Andric                                                       ScalarEvolution *SE,
51470b57cec5SDimitry Andric                                                       const SCEV *Ptr) {
51480b57cec5SDimitry Andric   // Address computations in vectorized code with non-consecutive addresses will
51490b57cec5SDimitry Andric   // likely result in more instructions compared to scalar code where the
51500b57cec5SDimitry Andric   // computation can more often be merged into the index mode. The resulting
51510b57cec5SDimitry Andric   // extra micro-ops can significantly decrease throughput.
51520b57cec5SDimitry Andric   const unsigned NumVectorInstToHideOverhead = 10;
51530b57cec5SDimitry Andric 
51540b57cec5SDimitry Andric   // Cost modeling of Strided Access Computation is hidden by the indexing
51550b57cec5SDimitry Andric   // modes of X86 regardless of the stride value. We dont believe that there
51560b57cec5SDimitry Andric   // is a difference between constant strided access in gerenal and constant
51570b57cec5SDimitry Andric   // strided value which is less than or equal to 64.
51580b57cec5SDimitry Andric   // Even in the case of (loop invariant) stride whose value is not known at
51590b57cec5SDimitry Andric   // compile time, the address computation will not incur more than one extra
51600b57cec5SDimitry Andric   // ADD instruction.
51614824e7fdSDimitry Andric   if (Ty->isVectorTy() && SE && !ST->hasAVX2()) {
51624824e7fdSDimitry Andric     // TODO: AVX2 is the current cut-off because we don't have correct
51634824e7fdSDimitry Andric     //       interleaving costs for prior ISA's.
51640b57cec5SDimitry Andric     if (!BaseT::isStridedAccess(Ptr))
51650b57cec5SDimitry Andric       return NumVectorInstToHideOverhead;
51660b57cec5SDimitry Andric     if (!BaseT::getConstantStrideStep(SE, Ptr))
51670b57cec5SDimitry Andric       return 1;
51680b57cec5SDimitry Andric   }
51690b57cec5SDimitry Andric 
51700b57cec5SDimitry Andric   return BaseT::getAddressComputationCost(Ty, SE, Ptr);
51710b57cec5SDimitry Andric }
51720b57cec5SDimitry Andric 
5173fe6060f1SDimitry Andric InstructionCost
5174fe6060f1SDimitry Andric X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
5175bdd1243dSDimitry Andric                                        std::optional<FastMathFlags> FMF,
51765ffd83dbSDimitry Andric                                        TTI::TargetCostKind CostKind) {
5177fe6060f1SDimitry Andric   if (TTI::requiresOrderedReduction(FMF))
5178fe6060f1SDimitry Andric     return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
51795ffd83dbSDimitry Andric 
51808bcb0991SDimitry Andric   // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
51818bcb0991SDimitry Andric   // and make it as the cost.
51820b57cec5SDimitry Andric 
518306c3fb27SDimitry Andric   static const CostTblEntry SLMCostTbl[] = {
5184480093f4SDimitry Andric     { ISD::FADD,  MVT::v2f64,   3 },
5185480093f4SDimitry Andric     { ISD::ADD,   MVT::v2i64,   5 },
5186480093f4SDimitry Andric   };
5187480093f4SDimitry Andric 
518806c3fb27SDimitry Andric   static const CostTblEntry SSE2CostTbl[] = {
51898bcb0991SDimitry Andric     { ISD::FADD,  MVT::v2f64,   2 },
5190fe6060f1SDimitry Andric     { ISD::FADD,  MVT::v2f32,   2 },
51918bcb0991SDimitry Andric     { ISD::FADD,  MVT::v4f32,   4 },
51928bcb0991SDimitry Andric     { ISD::ADD,   MVT::v2i64,   2 },      // The data reported by the IACA tool is "1.6".
51938bcb0991SDimitry Andric     { ISD::ADD,   MVT::v2i32,   2 }, // FIXME: chosen to be less than v4i32
51948bcb0991SDimitry Andric     { ISD::ADD,   MVT::v4i32,   3 },      // The data reported by the IACA tool is "3.3".
51958bcb0991SDimitry Andric     { ISD::ADD,   MVT::v2i16,   2 },      // The data reported by the IACA tool is "4.3".
51968bcb0991SDimitry Andric     { ISD::ADD,   MVT::v4i16,   3 },      // The data reported by the IACA tool is "4.3".
51978bcb0991SDimitry Andric     { ISD::ADD,   MVT::v8i16,   4 },      // The data reported by the IACA tool is "4.3".
51988bcb0991SDimitry Andric     { ISD::ADD,   MVT::v2i8,    2 },
51998bcb0991SDimitry Andric     { ISD::ADD,   MVT::v4i8,    2 },
52008bcb0991SDimitry Andric     { ISD::ADD,   MVT::v8i8,    2 },
52018bcb0991SDimitry Andric     { ISD::ADD,   MVT::v16i8,   3 },
52028bcb0991SDimitry Andric   };
52038bcb0991SDimitry Andric 
520406c3fb27SDimitry Andric   static const CostTblEntry AVX1CostTbl[] = {
52058bcb0991SDimitry Andric     { ISD::FADD,  MVT::v4f64,   3 },
52068bcb0991SDimitry Andric     { ISD::FADD,  MVT::v4f32,   3 },
52078bcb0991SDimitry Andric     { ISD::FADD,  MVT::v8f32,   4 },
52088bcb0991SDimitry Andric     { ISD::ADD,   MVT::v2i64,   1 },      // The data reported by the IACA tool is "1.5".
52098bcb0991SDimitry Andric     { ISD::ADD,   MVT::v4i64,   3 },
52108bcb0991SDimitry Andric     { ISD::ADD,   MVT::v8i32,   5 },
52118bcb0991SDimitry Andric     { ISD::ADD,   MVT::v16i16,  5 },
52128bcb0991SDimitry Andric     { ISD::ADD,   MVT::v32i8,   4 },
52138bcb0991SDimitry Andric   };
52140b57cec5SDimitry Andric 
52150b57cec5SDimitry Andric   int ISD = TLI->InstructionOpcodeToISD(Opcode);
52160b57cec5SDimitry Andric   assert(ISD && "Invalid opcode");
52170b57cec5SDimitry Andric 
52188bcb0991SDimitry Andric   // Before legalizing the type, give a chance to look up illegal narrow types
52198bcb0991SDimitry Andric   // in the table.
52208bcb0991SDimitry Andric   // FIXME: Is there a better way to do this?
52218bcb0991SDimitry Andric   EVT VT = TLI->getValueType(DL, ValTy);
52228bcb0991SDimitry Andric   if (VT.isSimple()) {
52238bcb0991SDimitry Andric     MVT MTy = VT.getSimpleVT();
5224349cc55cSDimitry Andric     if (ST->useSLMArithCosts())
522506c3fb27SDimitry Andric       if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
5226480093f4SDimitry Andric         return Entry->Cost;
5227480093f4SDimitry Andric 
52288bcb0991SDimitry Andric     if (ST->hasAVX())
522906c3fb27SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
52308bcb0991SDimitry Andric         return Entry->Cost;
52310b57cec5SDimitry Andric 
52328bcb0991SDimitry Andric     if (ST->hasSSE2())
523306c3fb27SDimitry Andric       if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
52348bcb0991SDimitry Andric         return Entry->Cost;
52358bcb0991SDimitry Andric   }
52360b57cec5SDimitry Andric 
5237bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
52380b57cec5SDimitry Andric 
52398bcb0991SDimitry Andric   MVT MTy = LT.second;
52400b57cec5SDimitry Andric 
52415ffd83dbSDimitry Andric   auto *ValVTy = cast<FixedVectorType>(ValTy);
5242480093f4SDimitry Andric 
5243fe6060f1SDimitry Andric   // Special case: vXi8 mul reductions are performed as vXi16.
5244fe6060f1SDimitry Andric   if (ISD == ISD::MUL && MTy.getScalarType() == MVT::i8) {
5245fe6060f1SDimitry Andric     auto *WideSclTy = IntegerType::get(ValVTy->getContext(), 16);
5246fe6060f1SDimitry Andric     auto *WideVecTy = FixedVectorType::get(WideSclTy, ValVTy->getNumElements());
5247fe6060f1SDimitry Andric     return getCastInstrCost(Instruction::ZExt, WideVecTy, ValTy,
5248fe6060f1SDimitry Andric                             TargetTransformInfo::CastContextHint::None,
5249fe6060f1SDimitry Andric                             CostKind) +
5250fe6060f1SDimitry Andric            getArithmeticReductionCost(Opcode, WideVecTy, FMF, CostKind);
5251fe6060f1SDimitry Andric   }
5252fe6060f1SDimitry Andric 
5253fe6060f1SDimitry Andric   InstructionCost ArithmeticCost = 0;
52545ffd83dbSDimitry Andric   if (LT.first != 1 && MTy.isVector() &&
52555ffd83dbSDimitry Andric       MTy.getVectorNumElements() < ValVTy->getNumElements()) {
52565ffd83dbSDimitry Andric     // Type needs to be split. We need LT.first - 1 arithmetic ops.
52575ffd83dbSDimitry Andric     auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
52585ffd83dbSDimitry Andric                                             MTy.getVectorNumElements());
52595ffd83dbSDimitry Andric     ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
52605ffd83dbSDimitry Andric     ArithmeticCost *= LT.first - 1;
52615ffd83dbSDimitry Andric   }
52620b57cec5SDimitry Andric 
5263349cc55cSDimitry Andric   if (ST->useSLMArithCosts())
526406c3fb27SDimitry Andric     if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
52655ffd83dbSDimitry Andric       return ArithmeticCost + Entry->Cost;
5266480093f4SDimitry Andric 
52670b57cec5SDimitry Andric   if (ST->hasAVX())
526806c3fb27SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
52695ffd83dbSDimitry Andric       return ArithmeticCost + Entry->Cost;
52700b57cec5SDimitry Andric 
52718bcb0991SDimitry Andric   if (ST->hasSSE2())
527206c3fb27SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
52735ffd83dbSDimitry Andric       return ArithmeticCost + Entry->Cost;
52740b57cec5SDimitry Andric 
5275480093f4SDimitry Andric   // FIXME: These assume a naive kshift+binop lowering, which is probably
5276480093f4SDimitry Andric   // conservative in most cases.
5277480093f4SDimitry Andric   static const CostTblEntry AVX512BoolReduction[] = {
5278480093f4SDimitry Andric     { ISD::AND,  MVT::v2i1,   3 },
5279480093f4SDimitry Andric     { ISD::AND,  MVT::v4i1,   5 },
5280480093f4SDimitry Andric     { ISD::AND,  MVT::v8i1,   7 },
5281480093f4SDimitry Andric     { ISD::AND,  MVT::v16i1,  9 },
5282480093f4SDimitry Andric     { ISD::AND,  MVT::v32i1, 11 },
5283480093f4SDimitry Andric     { ISD::AND,  MVT::v64i1, 13 },
5284480093f4SDimitry Andric     { ISD::OR,   MVT::v2i1,   3 },
5285480093f4SDimitry Andric     { ISD::OR,   MVT::v4i1,   5 },
5286480093f4SDimitry Andric     { ISD::OR,   MVT::v8i1,   7 },
5287480093f4SDimitry Andric     { ISD::OR,   MVT::v16i1,  9 },
5288480093f4SDimitry Andric     { ISD::OR,   MVT::v32i1, 11 },
5289480093f4SDimitry Andric     { ISD::OR,   MVT::v64i1, 13 },
5290480093f4SDimitry Andric   };
5291480093f4SDimitry Andric 
52920b57cec5SDimitry Andric   static const CostTblEntry AVX2BoolReduction[] = {
52930b57cec5SDimitry Andric     { ISD::AND,  MVT::v16i16,  2 }, // vpmovmskb + cmp
52940b57cec5SDimitry Andric     { ISD::AND,  MVT::v32i8,   2 }, // vpmovmskb + cmp
52950b57cec5SDimitry Andric     { ISD::OR,   MVT::v16i16,  2 }, // vpmovmskb + cmp
52960b57cec5SDimitry Andric     { ISD::OR,   MVT::v32i8,   2 }, // vpmovmskb + cmp
52970b57cec5SDimitry Andric   };
52980b57cec5SDimitry Andric 
52990b57cec5SDimitry Andric   static const CostTblEntry AVX1BoolReduction[] = {
53000b57cec5SDimitry Andric     { ISD::AND,  MVT::v4i64,   2 }, // vmovmskpd + cmp
53010b57cec5SDimitry Andric     { ISD::AND,  MVT::v8i32,   2 }, // vmovmskps + cmp
53020b57cec5SDimitry Andric     { ISD::AND,  MVT::v16i16,  4 }, // vextractf128 + vpand + vpmovmskb + cmp
53030b57cec5SDimitry Andric     { ISD::AND,  MVT::v32i8,   4 }, // vextractf128 + vpand + vpmovmskb + cmp
53040b57cec5SDimitry Andric     { ISD::OR,   MVT::v4i64,   2 }, // vmovmskpd + cmp
53050b57cec5SDimitry Andric     { ISD::OR,   MVT::v8i32,   2 }, // vmovmskps + cmp
53060b57cec5SDimitry Andric     { ISD::OR,   MVT::v16i16,  4 }, // vextractf128 + vpor + vpmovmskb + cmp
53070b57cec5SDimitry Andric     { ISD::OR,   MVT::v32i8,   4 }, // vextractf128 + vpor + vpmovmskb + cmp
53080b57cec5SDimitry Andric   };
53090b57cec5SDimitry Andric 
53100b57cec5SDimitry Andric   static const CostTblEntry SSE2BoolReduction[] = {
53110b57cec5SDimitry Andric     { ISD::AND,  MVT::v2i64,   2 }, // movmskpd + cmp
53120b57cec5SDimitry Andric     { ISD::AND,  MVT::v4i32,   2 }, // movmskps + cmp
53130b57cec5SDimitry Andric     { ISD::AND,  MVT::v8i16,   2 }, // pmovmskb + cmp
53140b57cec5SDimitry Andric     { ISD::AND,  MVT::v16i8,   2 }, // pmovmskb + cmp
53150b57cec5SDimitry Andric     { ISD::OR,   MVT::v2i64,   2 }, // movmskpd + cmp
53160b57cec5SDimitry Andric     { ISD::OR,   MVT::v4i32,   2 }, // movmskps + cmp
53170b57cec5SDimitry Andric     { ISD::OR,   MVT::v8i16,   2 }, // pmovmskb + cmp
53180b57cec5SDimitry Andric     { ISD::OR,   MVT::v16i8,   2 }, // pmovmskb + cmp
53190b57cec5SDimitry Andric   };
53200b57cec5SDimitry Andric 
53210b57cec5SDimitry Andric   // Handle bool allof/anyof patterns.
53225ffd83dbSDimitry Andric   if (ValVTy->getElementType()->isIntegerTy(1)) {
5323fe6060f1SDimitry Andric     InstructionCost ArithmeticCost = 0;
53245ffd83dbSDimitry Andric     if (LT.first != 1 && MTy.isVector() &&
53255ffd83dbSDimitry Andric         MTy.getVectorNumElements() < ValVTy->getNumElements()) {
53265ffd83dbSDimitry Andric       // Type needs to be split. We need LT.first - 1 arithmetic ops.
53275ffd83dbSDimitry Andric       auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
53285ffd83dbSDimitry Andric                                               MTy.getVectorNumElements());
53295ffd83dbSDimitry Andric       ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
53305ffd83dbSDimitry Andric       ArithmeticCost *= LT.first - 1;
53315ffd83dbSDimitry Andric     }
53325ffd83dbSDimitry Andric 
5333480093f4SDimitry Andric     if (ST->hasAVX512())
5334480093f4SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX512BoolReduction, ISD, MTy))
53355ffd83dbSDimitry Andric         return ArithmeticCost + Entry->Cost;
53360b57cec5SDimitry Andric     if (ST->hasAVX2())
53370b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy))
53385ffd83dbSDimitry Andric         return ArithmeticCost + Entry->Cost;
53390b57cec5SDimitry Andric     if (ST->hasAVX())
53400b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX1BoolReduction, ISD, MTy))
53415ffd83dbSDimitry Andric         return ArithmeticCost + Entry->Cost;
53420b57cec5SDimitry Andric     if (ST->hasSSE2())
53430b57cec5SDimitry Andric       if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
53445ffd83dbSDimitry Andric         return ArithmeticCost + Entry->Cost;
53455ffd83dbSDimitry Andric 
5346fe6060f1SDimitry Andric     return BaseT::getArithmeticReductionCost(Opcode, ValVTy, FMF, CostKind);
53475ffd83dbSDimitry Andric   }
53485ffd83dbSDimitry Andric 
53495ffd83dbSDimitry Andric   unsigned NumVecElts = ValVTy->getNumElements();
53505ffd83dbSDimitry Andric   unsigned ScalarSize = ValVTy->getScalarSizeInBits();
53515ffd83dbSDimitry Andric 
53525ffd83dbSDimitry Andric   // Special case power of 2 reductions where the scalar type isn't changed
53535ffd83dbSDimitry Andric   // by type legalization.
53545ffd83dbSDimitry Andric   if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits())
5355fe6060f1SDimitry Andric     return BaseT::getArithmeticReductionCost(Opcode, ValVTy, FMF, CostKind);
53565ffd83dbSDimitry Andric 
5357fe6060f1SDimitry Andric   InstructionCost ReductionCost = 0;
53585ffd83dbSDimitry Andric 
53595ffd83dbSDimitry Andric   auto *Ty = ValVTy;
53605ffd83dbSDimitry Andric   if (LT.first != 1 && MTy.isVector() &&
53615ffd83dbSDimitry Andric       MTy.getVectorNumElements() < ValVTy->getNumElements()) {
53625ffd83dbSDimitry Andric     // Type needs to be split. We need LT.first - 1 arithmetic ops.
53635ffd83dbSDimitry Andric     Ty = FixedVectorType::get(ValVTy->getElementType(),
53645ffd83dbSDimitry Andric                               MTy.getVectorNumElements());
53655ffd83dbSDimitry Andric     ReductionCost = getArithmeticInstrCost(Opcode, Ty, CostKind);
53665ffd83dbSDimitry Andric     ReductionCost *= LT.first - 1;
53675ffd83dbSDimitry Andric     NumVecElts = MTy.getVectorNumElements();
53685ffd83dbSDimitry Andric   }
53695ffd83dbSDimitry Andric 
53705ffd83dbSDimitry Andric   // Now handle reduction with the legal type, taking into account size changes
53715ffd83dbSDimitry Andric   // at each level.
53725ffd83dbSDimitry Andric   while (NumVecElts > 1) {
53735ffd83dbSDimitry Andric     // Determine the size of the remaining vector we need to reduce.
53745ffd83dbSDimitry Andric     unsigned Size = NumVecElts * ScalarSize;
53755ffd83dbSDimitry Andric     NumVecElts /= 2;
53765ffd83dbSDimitry Andric     // If we're reducing from 256/512 bits, use an extract_subvector.
53775ffd83dbSDimitry Andric     if (Size > 128) {
53785ffd83dbSDimitry Andric       auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts);
53795ffd83dbSDimitry Andric       ReductionCost +=
5380bdd1243dSDimitry Andric           getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, CostKind,
5381bdd1243dSDimitry Andric                          NumVecElts, SubTy);
53825ffd83dbSDimitry Andric       Ty = SubTy;
53835ffd83dbSDimitry Andric     } else if (Size == 128) {
53845ffd83dbSDimitry Andric       // Reducing from 128 bits is a permute of v2f64/v2i64.
53855ffd83dbSDimitry Andric       FixedVectorType *ShufTy;
53865ffd83dbSDimitry Andric       if (ValVTy->isFloatingPointTy())
53875ffd83dbSDimitry Andric         ShufTy =
53885ffd83dbSDimitry Andric             FixedVectorType::get(Type::getDoubleTy(ValVTy->getContext()), 2);
53895ffd83dbSDimitry Andric       else
53905ffd83dbSDimitry Andric         ShufTy =
53915ffd83dbSDimitry Andric             FixedVectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2);
5392bdd1243dSDimitry Andric       ReductionCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy,
5393bdd1243dSDimitry Andric                                       std::nullopt, CostKind, 0, nullptr);
53945ffd83dbSDimitry Andric     } else if (Size == 64) {
53955ffd83dbSDimitry Andric       // Reducing from 64 bits is a shuffle of v4f32/v4i32.
53965ffd83dbSDimitry Andric       FixedVectorType *ShufTy;
53975ffd83dbSDimitry Andric       if (ValVTy->isFloatingPointTy())
53985ffd83dbSDimitry Andric         ShufTy =
53995ffd83dbSDimitry Andric             FixedVectorType::get(Type::getFloatTy(ValVTy->getContext()), 4);
54005ffd83dbSDimitry Andric       else
54015ffd83dbSDimitry Andric         ShufTy =
54025ffd83dbSDimitry Andric             FixedVectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4);
5403bdd1243dSDimitry Andric       ReductionCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy,
5404bdd1243dSDimitry Andric                                       std::nullopt, CostKind, 0, nullptr);
54055ffd83dbSDimitry Andric     } else {
54065ffd83dbSDimitry Andric       // Reducing from smaller size is a shift by immediate.
54075ffd83dbSDimitry Andric       auto *ShiftTy = FixedVectorType::get(
54085ffd83dbSDimitry Andric           Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size);
54095ffd83dbSDimitry Andric       ReductionCost += getArithmeticInstrCost(
54105ffd83dbSDimitry Andric           Instruction::LShr, ShiftTy, CostKind,
5411bdd1243dSDimitry Andric           {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
5412bdd1243dSDimitry Andric           {TargetTransformInfo::OK_UniformConstantValue, TargetTransformInfo::OP_None});
54135ffd83dbSDimitry Andric     }
54145ffd83dbSDimitry Andric 
54155ffd83dbSDimitry Andric     // Add the arithmetic op for this level.
54165ffd83dbSDimitry Andric     ReductionCost += getArithmeticInstrCost(Opcode, Ty, CostKind);
54175ffd83dbSDimitry Andric   }
54185ffd83dbSDimitry Andric 
54195ffd83dbSDimitry Andric   // Add the final extract element to the cost.
5420bdd1243dSDimitry Andric   return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty,
5421bdd1243dSDimitry Andric                                             CostKind, 0, nullptr, nullptr);
54225ffd83dbSDimitry Andric }
54235ffd83dbSDimitry Andric 
542406c3fb27SDimitry Andric InstructionCost X86TTIImpl::getMinMaxCost(Intrinsic::ID IID, Type *Ty,
542506c3fb27SDimitry Andric                                           TTI::TargetCostKind CostKind,
542606c3fb27SDimitry Andric                                           FastMathFlags FMF) {
542706c3fb27SDimitry Andric   IntrinsicCostAttributes ICA(IID, Ty, {Ty, Ty}, FMF);
542806c3fb27SDimitry Andric   return getIntrinsicInstrCost(ICA, CostKind);
54290b57cec5SDimitry Andric }
54300b57cec5SDimitry Andric 
5431fe6060f1SDimitry Andric InstructionCost
543206c3fb27SDimitry Andric X86TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *ValTy,
543306c3fb27SDimitry Andric                                    FastMathFlags FMF,
54345ffd83dbSDimitry Andric                                    TTI::TargetCostKind CostKind) {
5435bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
54360b57cec5SDimitry Andric 
54370b57cec5SDimitry Andric   MVT MTy = LT.second;
54380b57cec5SDimitry Andric 
54390b57cec5SDimitry Andric   int ISD;
54400b57cec5SDimitry Andric   if (ValTy->isIntOrIntVectorTy()) {
544106c3fb27SDimitry Andric     ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ? ISD::UMIN
544206c3fb27SDimitry Andric                                                              : ISD::SMIN;
54430b57cec5SDimitry Andric   } else {
54440b57cec5SDimitry Andric     assert(ValTy->isFPOrFPVectorTy() &&
54450b57cec5SDimitry Andric            "Expected float point or integer vector type.");
544606c3fb27SDimitry Andric     ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
544706c3fb27SDimitry Andric               ? ISD::FMINNUM
544806c3fb27SDimitry Andric               : ISD::FMINIMUM;
54490b57cec5SDimitry Andric   }
54500b57cec5SDimitry Andric 
54510b57cec5SDimitry Andric   // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
54520b57cec5SDimitry Andric   // and make it as the cost.
54530b57cec5SDimitry Andric 
545406c3fb27SDimitry Andric   static const CostTblEntry SSE2CostTbl[] = {
54555ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v2i16, 5}, // need pxors to use pminsw/pmaxsw
54565ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v4i16, 7}, // need pxors to use pminsw/pmaxsw
54575ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v8i16, 9}, // need pxors to use pminsw/pmaxsw
54580b57cec5SDimitry Andric   };
54590b57cec5SDimitry Andric 
546006c3fb27SDimitry Andric   static const CostTblEntry SSE41CostTbl[] = {
54615ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v2i16, 3}, // same as sse2
54625ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v4i16, 5}, // same as sse2
54635ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v2i16, 5}, // same as sse2
54645ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v4i16, 7}, // same as sse2
54655ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v8i16, 4}, // phminposuw+xor
54665ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v8i16, 4}, // FIXME: umin is cheaper than umax
54675ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v2i8,  3}, // pminsb
54685ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v4i8,  5}, // pminsb
54695ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v8i8,  7}, // pminsb
54705ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v16i8, 6},
54715ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v2i8,  3}, // same as sse2
54725ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v4i8,  5}, // same as sse2
54735ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v8i8,  7}, // same as sse2
54745ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v16i8, 6}, // FIXME: umin is cheaper than umax
54750b57cec5SDimitry Andric   };
54760b57cec5SDimitry Andric 
547706c3fb27SDimitry Andric   static const CostTblEntry AVX1CostTbl[] = {
54785ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v16i16, 6},
54795ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v16i16, 6}, // FIXME: umin is cheaper than umax
54805ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v32i8, 8},
54815ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v32i8, 8},
54820b57cec5SDimitry Andric   };
54830b57cec5SDimitry Andric 
548406c3fb27SDimitry Andric   static const CostTblEntry AVX512BWCostTbl[] = {
54855ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v32i16, 8},
54865ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v32i16, 8}, // FIXME: umin is cheaper than umax
54875ffd83dbSDimitry Andric       {ISD::SMIN, MVT::v64i8, 10},
54885ffd83dbSDimitry Andric       {ISD::UMIN, MVT::v64i8, 10},
54890b57cec5SDimitry Andric   };
54900b57cec5SDimitry Andric 
54915ffd83dbSDimitry Andric   // Before legalizing the type, give a chance to look up illegal narrow types
54925ffd83dbSDimitry Andric   // in the table.
54935ffd83dbSDimitry Andric   // FIXME: Is there a better way to do this?
54945ffd83dbSDimitry Andric   EVT VT = TLI->getValueType(DL, ValTy);
54955ffd83dbSDimitry Andric   if (VT.isSimple()) {
54965ffd83dbSDimitry Andric     MVT MTy = VT.getSimpleVT();
54975ffd83dbSDimitry Andric     if (ST->hasBWI())
549806c3fb27SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
54995ffd83dbSDimitry Andric         return Entry->Cost;
55000b57cec5SDimitry Andric 
55010b57cec5SDimitry Andric     if (ST->hasAVX())
550206c3fb27SDimitry Andric       if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
55035ffd83dbSDimitry Andric         return Entry->Cost;
55040b57cec5SDimitry Andric 
55050b57cec5SDimitry Andric     if (ST->hasSSE41())
550606c3fb27SDimitry Andric       if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
55075ffd83dbSDimitry Andric         return Entry->Cost;
55080b57cec5SDimitry Andric 
55090b57cec5SDimitry Andric     if (ST->hasSSE2())
551006c3fb27SDimitry Andric       if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
55115ffd83dbSDimitry Andric         return Entry->Cost;
55120b57cec5SDimitry Andric   }
55130b57cec5SDimitry Andric 
55145ffd83dbSDimitry Andric   auto *ValVTy = cast<FixedVectorType>(ValTy);
55155ffd83dbSDimitry Andric   unsigned NumVecElts = ValVTy->getNumElements();
55165ffd83dbSDimitry Andric 
55175ffd83dbSDimitry Andric   auto *Ty = ValVTy;
5518fe6060f1SDimitry Andric   InstructionCost MinMaxCost = 0;
55195ffd83dbSDimitry Andric   if (LT.first != 1 && MTy.isVector() &&
55205ffd83dbSDimitry Andric       MTy.getVectorNumElements() < ValVTy->getNumElements()) {
55215ffd83dbSDimitry Andric     // Type needs to be split. We need LT.first - 1 operations ops.
55225ffd83dbSDimitry Andric     Ty = FixedVectorType::get(ValVTy->getElementType(),
55235ffd83dbSDimitry Andric                               MTy.getVectorNumElements());
552406c3fb27SDimitry Andric     MinMaxCost = getMinMaxCost(IID, Ty, CostKind, FMF);
55255ffd83dbSDimitry Andric     MinMaxCost *= LT.first - 1;
55265ffd83dbSDimitry Andric     NumVecElts = MTy.getVectorNumElements();
55275ffd83dbSDimitry Andric   }
55285ffd83dbSDimitry Andric 
55295ffd83dbSDimitry Andric   if (ST->hasBWI())
553006c3fb27SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
55315ffd83dbSDimitry Andric       return MinMaxCost + Entry->Cost;
55325ffd83dbSDimitry Andric 
55335ffd83dbSDimitry Andric   if (ST->hasAVX())
553406c3fb27SDimitry Andric     if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
55355ffd83dbSDimitry Andric       return MinMaxCost + Entry->Cost;
55365ffd83dbSDimitry Andric 
55375ffd83dbSDimitry Andric   if (ST->hasSSE41())
553806c3fb27SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
55395ffd83dbSDimitry Andric       return MinMaxCost + Entry->Cost;
55405ffd83dbSDimitry Andric 
55415ffd83dbSDimitry Andric   if (ST->hasSSE2())
554206c3fb27SDimitry Andric     if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
55435ffd83dbSDimitry Andric       return MinMaxCost + Entry->Cost;
55445ffd83dbSDimitry Andric 
55455ffd83dbSDimitry Andric   unsigned ScalarSize = ValTy->getScalarSizeInBits();
55465ffd83dbSDimitry Andric 
55475ffd83dbSDimitry Andric   // Special case power of 2 reductions where the scalar type isn't changed
55485ffd83dbSDimitry Andric   // by type legalization.
55495ffd83dbSDimitry Andric   if (!isPowerOf2_32(ValVTy->getNumElements()) ||
55505ffd83dbSDimitry Andric       ScalarSize != MTy.getScalarSizeInBits())
555106c3fb27SDimitry Andric     return BaseT::getMinMaxReductionCost(IID, ValTy, FMF, CostKind);
55525ffd83dbSDimitry Andric 
55535ffd83dbSDimitry Andric   // Now handle reduction with the legal type, taking into account size changes
55545ffd83dbSDimitry Andric   // at each level.
55555ffd83dbSDimitry Andric   while (NumVecElts > 1) {
55565ffd83dbSDimitry Andric     // Determine the size of the remaining vector we need to reduce.
55575ffd83dbSDimitry Andric     unsigned Size = NumVecElts * ScalarSize;
55585ffd83dbSDimitry Andric     NumVecElts /= 2;
55595ffd83dbSDimitry Andric     // If we're reducing from 256/512 bits, use an extract_subvector.
55605ffd83dbSDimitry Andric     if (Size > 128) {
55615ffd83dbSDimitry Andric       auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts);
5562bdd1243dSDimitry Andric       MinMaxCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
5563bdd1243dSDimitry Andric                                    CostKind, NumVecElts, SubTy);
55645ffd83dbSDimitry Andric       Ty = SubTy;
55655ffd83dbSDimitry Andric     } else if (Size == 128) {
55665ffd83dbSDimitry Andric       // Reducing from 128 bits is a permute of v2f64/v2i64.
55675ffd83dbSDimitry Andric       VectorType *ShufTy;
55685ffd83dbSDimitry Andric       if (ValTy->isFloatingPointTy())
55695ffd83dbSDimitry Andric         ShufTy =
55705ffd83dbSDimitry Andric             FixedVectorType::get(Type::getDoubleTy(ValTy->getContext()), 2);
55715ffd83dbSDimitry Andric       else
55725ffd83dbSDimitry Andric         ShufTy = FixedVectorType::get(Type::getInt64Ty(ValTy->getContext()), 2);
5573bdd1243dSDimitry Andric       MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy,
5574bdd1243dSDimitry Andric                                    std::nullopt, CostKind, 0, nullptr);
55755ffd83dbSDimitry Andric     } else if (Size == 64) {
55765ffd83dbSDimitry Andric       // Reducing from 64 bits is a shuffle of v4f32/v4i32.
55775ffd83dbSDimitry Andric       FixedVectorType *ShufTy;
55785ffd83dbSDimitry Andric       if (ValTy->isFloatingPointTy())
55795ffd83dbSDimitry Andric         ShufTy = FixedVectorType::get(Type::getFloatTy(ValTy->getContext()), 4);
55805ffd83dbSDimitry Andric       else
55815ffd83dbSDimitry Andric         ShufTy = FixedVectorType::get(Type::getInt32Ty(ValTy->getContext()), 4);
5582bdd1243dSDimitry Andric       MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy,
5583bdd1243dSDimitry Andric                                    std::nullopt, CostKind, 0, nullptr);
55845ffd83dbSDimitry Andric     } else {
55855ffd83dbSDimitry Andric       // Reducing from smaller size is a shift by immediate.
55865ffd83dbSDimitry Andric       auto *ShiftTy = FixedVectorType::get(
55875ffd83dbSDimitry Andric           Type::getIntNTy(ValTy->getContext(), Size), 128 / Size);
55885ffd83dbSDimitry Andric       MinMaxCost += getArithmeticInstrCost(
55895ffd83dbSDimitry Andric           Instruction::LShr, ShiftTy, TTI::TCK_RecipThroughput,
5590bdd1243dSDimitry Andric           {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
5591bdd1243dSDimitry Andric           {TargetTransformInfo::OK_UniformConstantValue, TargetTransformInfo::OP_None});
55925ffd83dbSDimitry Andric     }
55935ffd83dbSDimitry Andric 
55945ffd83dbSDimitry Andric     // Add the arithmetic op for this level.
559506c3fb27SDimitry Andric     MinMaxCost += getMinMaxCost(IID, Ty, CostKind, FMF);
55965ffd83dbSDimitry Andric   }
55975ffd83dbSDimitry Andric 
55985ffd83dbSDimitry Andric   // Add the final extract element to the cost.
5599bdd1243dSDimitry Andric   return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty,
5600bdd1243dSDimitry Andric                                          CostKind, 0, nullptr, nullptr);
56010b57cec5SDimitry Andric }
56020b57cec5SDimitry Andric 
56030b57cec5SDimitry Andric /// Calculate the cost of materializing a 64-bit value. This helper
56040b57cec5SDimitry Andric /// method might only calculate a fraction of a larger immediate. Therefore it
56050b57cec5SDimitry Andric /// is valid to return a cost of ZERO.
5606fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getIntImmCost(int64_t Val) {
56070b57cec5SDimitry Andric   if (Val == 0)
56080b57cec5SDimitry Andric     return TTI::TCC_Free;
56090b57cec5SDimitry Andric 
56100b57cec5SDimitry Andric   if (isInt<32>(Val))
56110b57cec5SDimitry Andric     return TTI::TCC_Basic;
56120b57cec5SDimitry Andric 
56130b57cec5SDimitry Andric   return 2 * TTI::TCC_Basic;
56140b57cec5SDimitry Andric }
56150b57cec5SDimitry Andric 
5616fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
56175ffd83dbSDimitry Andric                                           TTI::TargetCostKind CostKind) {
56180b57cec5SDimitry Andric   assert(Ty->isIntegerTy());
56190b57cec5SDimitry Andric 
56200b57cec5SDimitry Andric   unsigned BitSize = Ty->getPrimitiveSizeInBits();
56210b57cec5SDimitry Andric   if (BitSize == 0)
56220b57cec5SDimitry Andric     return ~0U;
56230b57cec5SDimitry Andric 
56240b57cec5SDimitry Andric   // Never hoist constants larger than 128bit, because this might lead to
56250b57cec5SDimitry Andric   // incorrect code generation or assertions in codegen.
56260b57cec5SDimitry Andric   // Fixme: Create a cost model for types larger than i128 once the codegen
56270b57cec5SDimitry Andric   // issues have been fixed.
56280b57cec5SDimitry Andric   if (BitSize > 128)
56290b57cec5SDimitry Andric     return TTI::TCC_Free;
56300b57cec5SDimitry Andric 
56310b57cec5SDimitry Andric   if (Imm == 0)
56320b57cec5SDimitry Andric     return TTI::TCC_Free;
56330b57cec5SDimitry Andric 
56340b57cec5SDimitry Andric   // Sign-extend all constants to a multiple of 64-bit.
56350b57cec5SDimitry Andric   APInt ImmVal = Imm;
56360b57cec5SDimitry Andric   if (BitSize % 64 != 0)
56370b57cec5SDimitry Andric     ImmVal = Imm.sext(alignTo(BitSize, 64));
56380b57cec5SDimitry Andric 
56390b57cec5SDimitry Andric   // Split the constant into 64-bit chunks and calculate the cost for each
56400b57cec5SDimitry Andric   // chunk.
5641fe6060f1SDimitry Andric   InstructionCost Cost = 0;
56420b57cec5SDimitry Andric   for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
56430b57cec5SDimitry Andric     APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
56440b57cec5SDimitry Andric     int64_t Val = Tmp.getSExtValue();
56450b57cec5SDimitry Andric     Cost += getIntImmCost(Val);
56460b57cec5SDimitry Andric   }
56470b57cec5SDimitry Andric   // We need at least one instruction to materialize the constant.
5648fe6060f1SDimitry Andric   return std::max<InstructionCost>(1, Cost);
56490b57cec5SDimitry Andric }
56500b57cec5SDimitry Andric 
5651fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
5652e8d8bef9SDimitry Andric                                               const APInt &Imm, Type *Ty,
5653e8d8bef9SDimitry Andric                                               TTI::TargetCostKind CostKind,
5654e8d8bef9SDimitry Andric                                               Instruction *Inst) {
56550b57cec5SDimitry Andric   assert(Ty->isIntegerTy());
56560b57cec5SDimitry Andric 
56570b57cec5SDimitry Andric   unsigned BitSize = Ty->getPrimitiveSizeInBits();
56580b57cec5SDimitry Andric   // There is no cost model for constants with a bit size of 0. Return TCC_Free
56590b57cec5SDimitry Andric   // here, so that constant hoisting will ignore this constant.
56600b57cec5SDimitry Andric   if (BitSize == 0)
56610b57cec5SDimitry Andric     return TTI::TCC_Free;
56620b57cec5SDimitry Andric 
56630b57cec5SDimitry Andric   unsigned ImmIdx = ~0U;
56640b57cec5SDimitry Andric   switch (Opcode) {
56650b57cec5SDimitry Andric   default:
56660b57cec5SDimitry Andric     return TTI::TCC_Free;
56670b57cec5SDimitry Andric   case Instruction::GetElementPtr:
56680b57cec5SDimitry Andric     // Always hoist the base address of a GetElementPtr. This prevents the
56690b57cec5SDimitry Andric     // creation of new constants for every base constant that gets constant
56700b57cec5SDimitry Andric     // folded with the offset.
56710b57cec5SDimitry Andric     if (Idx == 0)
56720b57cec5SDimitry Andric       return 2 * TTI::TCC_Basic;
56730b57cec5SDimitry Andric     return TTI::TCC_Free;
56740b57cec5SDimitry Andric   case Instruction::Store:
56750b57cec5SDimitry Andric     ImmIdx = 0;
56760b57cec5SDimitry Andric     break;
56770b57cec5SDimitry Andric   case Instruction::ICmp:
56780b57cec5SDimitry Andric     // This is an imperfect hack to prevent constant hoisting of
56790b57cec5SDimitry Andric     // compares that might be trying to check if a 64-bit value fits in
56800b57cec5SDimitry Andric     // 32-bits. The backend can optimize these cases using a right shift by 32.
56810b57cec5SDimitry Andric     // Ideally we would check the compare predicate here. There also other
56820b57cec5SDimitry Andric     // similar immediates the backend can use shifts for.
56830b57cec5SDimitry Andric     if (Idx == 1 && Imm.getBitWidth() == 64) {
56840b57cec5SDimitry Andric       uint64_t ImmVal = Imm.getZExtValue();
56850b57cec5SDimitry Andric       if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
56860b57cec5SDimitry Andric         return TTI::TCC_Free;
56870b57cec5SDimitry Andric     }
56880b57cec5SDimitry Andric     ImmIdx = 1;
56890b57cec5SDimitry Andric     break;
56900b57cec5SDimitry Andric   case Instruction::And:
56910b57cec5SDimitry Andric     // We support 64-bit ANDs with immediates with 32-bits of leading zeroes
56920b57cec5SDimitry Andric     // by using a 32-bit operation with implicit zero extension. Detect such
56930b57cec5SDimitry Andric     // immediates here as the normal path expects bit 31 to be sign extended.
5694bdd1243dSDimitry Andric     if (Idx == 1 && Imm.getBitWidth() == 64 && Imm.isIntN(32))
56950b57cec5SDimitry Andric       return TTI::TCC_Free;
56960b57cec5SDimitry Andric     ImmIdx = 1;
56970b57cec5SDimitry Andric     break;
56980b57cec5SDimitry Andric   case Instruction::Add:
56990b57cec5SDimitry Andric   case Instruction::Sub:
57000b57cec5SDimitry Andric     // For add/sub, we can use the opposite instruction for INT32_MIN.
57010b57cec5SDimitry Andric     if (Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
57020b57cec5SDimitry Andric       return TTI::TCC_Free;
57030b57cec5SDimitry Andric     ImmIdx = 1;
57040b57cec5SDimitry Andric     break;
57050b57cec5SDimitry Andric   case Instruction::UDiv:
57060b57cec5SDimitry Andric   case Instruction::SDiv:
57070b57cec5SDimitry Andric   case Instruction::URem:
57080b57cec5SDimitry Andric   case Instruction::SRem:
57090b57cec5SDimitry Andric     // Division by constant is typically expanded later into a different
57100b57cec5SDimitry Andric     // instruction sequence. This completely changes the constants.
57110b57cec5SDimitry Andric     // Report them as "free" to stop ConstantHoist from marking them as opaque.
57120b57cec5SDimitry Andric     return TTI::TCC_Free;
57130b57cec5SDimitry Andric   case Instruction::Mul:
57140b57cec5SDimitry Andric   case Instruction::Or:
57150b57cec5SDimitry Andric   case Instruction::Xor:
57160b57cec5SDimitry Andric     ImmIdx = 1;
57170b57cec5SDimitry Andric     break;
57180b57cec5SDimitry Andric   // Always return TCC_Free for the shift value of a shift instruction.
57190b57cec5SDimitry Andric   case Instruction::Shl:
57200b57cec5SDimitry Andric   case Instruction::LShr:
57210b57cec5SDimitry Andric   case Instruction::AShr:
57220b57cec5SDimitry Andric     if (Idx == 1)
57230b57cec5SDimitry Andric       return TTI::TCC_Free;
57240b57cec5SDimitry Andric     break;
57250b57cec5SDimitry Andric   case Instruction::Trunc:
57260b57cec5SDimitry Andric   case Instruction::ZExt:
57270b57cec5SDimitry Andric   case Instruction::SExt:
57280b57cec5SDimitry Andric   case Instruction::IntToPtr:
57290b57cec5SDimitry Andric   case Instruction::PtrToInt:
57300b57cec5SDimitry Andric   case Instruction::BitCast:
57310b57cec5SDimitry Andric   case Instruction::PHI:
57320b57cec5SDimitry Andric   case Instruction::Call:
57330b57cec5SDimitry Andric   case Instruction::Select:
57340b57cec5SDimitry Andric   case Instruction::Ret:
57350b57cec5SDimitry Andric   case Instruction::Load:
57360b57cec5SDimitry Andric     break;
57370b57cec5SDimitry Andric   }
57380b57cec5SDimitry Andric 
57390b57cec5SDimitry Andric   if (Idx == ImmIdx) {
574006c3fb27SDimitry Andric     uint64_t NumConstants = divideCeil(BitSize, 64);
5741fe6060f1SDimitry Andric     InstructionCost Cost = X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
57420b57cec5SDimitry Andric     return (Cost <= NumConstants * TTI::TCC_Basic)
57430b57cec5SDimitry Andric                ? static_cast<int>(TTI::TCC_Free)
57440b57cec5SDimitry Andric                : Cost;
57450b57cec5SDimitry Andric   }
57460b57cec5SDimitry Andric 
57475ffd83dbSDimitry Andric   return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
57480b57cec5SDimitry Andric }
57490b57cec5SDimitry Andric 
5750fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
57515ffd83dbSDimitry Andric                                                 const APInt &Imm, Type *Ty,
57525ffd83dbSDimitry Andric                                                 TTI::TargetCostKind CostKind) {
57530b57cec5SDimitry Andric   assert(Ty->isIntegerTy());
57540b57cec5SDimitry Andric 
57550b57cec5SDimitry Andric   unsigned BitSize = Ty->getPrimitiveSizeInBits();
57560b57cec5SDimitry Andric   // There is no cost model for constants with a bit size of 0. Return TCC_Free
57570b57cec5SDimitry Andric   // here, so that constant hoisting will ignore this constant.
57580b57cec5SDimitry Andric   if (BitSize == 0)
57590b57cec5SDimitry Andric     return TTI::TCC_Free;
57600b57cec5SDimitry Andric 
57610b57cec5SDimitry Andric   switch (IID) {
57620b57cec5SDimitry Andric   default:
57630b57cec5SDimitry Andric     return TTI::TCC_Free;
57640b57cec5SDimitry Andric   case Intrinsic::sadd_with_overflow:
57650b57cec5SDimitry Andric   case Intrinsic::uadd_with_overflow:
57660b57cec5SDimitry Andric   case Intrinsic::ssub_with_overflow:
57670b57cec5SDimitry Andric   case Intrinsic::usub_with_overflow:
57680b57cec5SDimitry Andric   case Intrinsic::smul_with_overflow:
57690b57cec5SDimitry Andric   case Intrinsic::umul_with_overflow:
5770bdd1243dSDimitry Andric     if ((Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
57710b57cec5SDimitry Andric       return TTI::TCC_Free;
57720b57cec5SDimitry Andric     break;
57730b57cec5SDimitry Andric   case Intrinsic::experimental_stackmap:
5774bdd1243dSDimitry Andric     if ((Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
57750b57cec5SDimitry Andric       return TTI::TCC_Free;
57760b57cec5SDimitry Andric     break;
57770b57cec5SDimitry Andric   case Intrinsic::experimental_patchpoint_void:
5778*0fca6ea1SDimitry Andric   case Intrinsic::experimental_patchpoint:
5779bdd1243dSDimitry Andric     if ((Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
57800b57cec5SDimitry Andric       return TTI::TCC_Free;
57810b57cec5SDimitry Andric     break;
57820b57cec5SDimitry Andric   }
57835ffd83dbSDimitry Andric   return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
57840b57cec5SDimitry Andric }
57850b57cec5SDimitry Andric 
5786fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getCFInstrCost(unsigned Opcode,
5787fe6060f1SDimitry Andric                                            TTI::TargetCostKind CostKind,
5788fe6060f1SDimitry Andric                                            const Instruction *I) {
57895ffd83dbSDimitry Andric   if (CostKind != TTI::TCK_RecipThroughput)
57905ffd83dbSDimitry Andric     return Opcode == Instruction::PHI ? 0 : 1;
57915ffd83dbSDimitry Andric   // Branches are assumed to be predicted.
5792fe6060f1SDimitry Andric   return 0;
57930b57cec5SDimitry Andric }
57940b57cec5SDimitry Andric 
5795e8d8bef9SDimitry Andric int X86TTIImpl::getGatherOverhead() const {
5796e8d8bef9SDimitry Andric   // Some CPUs have more overhead for gather. The specified overhead is relative
5797e8d8bef9SDimitry Andric   // to the Load operation. "2" is the number provided by Intel architects. This
5798e8d8bef9SDimitry Andric   // parameter is used for cost estimation of Gather Op and comparison with
5799e8d8bef9SDimitry Andric   // other alternatives.
5800e8d8bef9SDimitry Andric   // TODO: Remove the explicit hasAVX512()?, That would mean we would only
5801e8d8bef9SDimitry Andric   // enable gather with a -march.
5802e8d8bef9SDimitry Andric   if (ST->hasAVX512() || (ST->hasAVX2() && ST->hasFastGather()))
5803e8d8bef9SDimitry Andric     return 2;
5804e8d8bef9SDimitry Andric 
5805e8d8bef9SDimitry Andric   return 1024;
5806e8d8bef9SDimitry Andric }
5807e8d8bef9SDimitry Andric 
5808e8d8bef9SDimitry Andric int X86TTIImpl::getScatterOverhead() const {
5809e8d8bef9SDimitry Andric   if (ST->hasAVX512())
5810e8d8bef9SDimitry Andric     return 2;
5811e8d8bef9SDimitry Andric 
5812e8d8bef9SDimitry Andric   return 1024;
5813e8d8bef9SDimitry Andric }
5814e8d8bef9SDimitry Andric 
5815e8d8bef9SDimitry Andric // Return an average cost of Gather / Scatter instruction, maybe improved later.
5816*0fca6ea1SDimitry Andric InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode,
5817*0fca6ea1SDimitry Andric                                             TTI::TargetCostKind CostKind,
5818*0fca6ea1SDimitry Andric                                             Type *SrcVTy, const Value *Ptr,
5819*0fca6ea1SDimitry Andric                                             Align Alignment,
5820fe6060f1SDimitry Andric                                             unsigned AddressSpace) {
58210b57cec5SDimitry Andric 
58220b57cec5SDimitry Andric   assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
58235ffd83dbSDimitry Andric   unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
58240b57cec5SDimitry Andric 
58250b57cec5SDimitry Andric   // Try to reduce index size from 64 bit (default for GEP)
58260b57cec5SDimitry Andric   // to 32. It is essential for VF 16. If the index can't be reduced to 32, the
58270b57cec5SDimitry Andric   // operation will use 16 x 64 indices which do not fit in a zmm and needs
58280b57cec5SDimitry Andric   // to split. Also check that the base pointer is the same for all lanes,
58290b57cec5SDimitry Andric   // and that there's at most one variable index.
58305ffd83dbSDimitry Andric   auto getIndexSizeInBits = [](const Value *Ptr, const DataLayout &DL) {
58310b57cec5SDimitry Andric     unsigned IndexSize = DL.getPointerSizeInBits();
58325ffd83dbSDimitry Andric     const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
58330b57cec5SDimitry Andric     if (IndexSize < 64 || !GEP)
58340b57cec5SDimitry Andric       return IndexSize;
58350b57cec5SDimitry Andric 
58360b57cec5SDimitry Andric     unsigned NumOfVarIndices = 0;
58375ffd83dbSDimitry Andric     const Value *Ptrs = GEP->getPointerOperand();
58380b57cec5SDimitry Andric     if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs))
58390b57cec5SDimitry Andric       return IndexSize;
584006c3fb27SDimitry Andric     for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
584106c3fb27SDimitry Andric       if (isa<Constant>(GEP->getOperand(I)))
58420b57cec5SDimitry Andric         continue;
584306c3fb27SDimitry Andric       Type *IndxTy = GEP->getOperand(I)->getType();
58445ffd83dbSDimitry Andric       if (auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
58455ffd83dbSDimitry Andric         IndxTy = IndexVTy->getElementType();
58460b57cec5SDimitry Andric       if ((IndxTy->getPrimitiveSizeInBits() == 64 &&
584706c3fb27SDimitry Andric            !isa<SExtInst>(GEP->getOperand(I))) ||
58480b57cec5SDimitry Andric           ++NumOfVarIndices > 1)
58490b57cec5SDimitry Andric         return IndexSize; // 64
58500b57cec5SDimitry Andric     }
58510b57cec5SDimitry Andric     return (unsigned)32;
58520b57cec5SDimitry Andric   };
58530b57cec5SDimitry Andric 
58540b57cec5SDimitry Andric   // Trying to reduce IndexSize to 32 bits for vector 16.
58550b57cec5SDimitry Andric   // By default the IndexSize is equal to pointer size.
58560b57cec5SDimitry Andric   unsigned IndexSize = (ST->hasAVX512() && VF >= 16)
58570b57cec5SDimitry Andric                            ? getIndexSizeInBits(Ptr, DL)
58580b57cec5SDimitry Andric                            : DL.getPointerSizeInBits();
58590b57cec5SDimitry Andric 
58605ffd83dbSDimitry Andric   auto *IndexVTy = FixedVectorType::get(
58615ffd83dbSDimitry Andric       IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
5862bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> IdxsLT = getTypeLegalizationCost(IndexVTy);
5863bdd1243dSDimitry Andric   std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcVTy);
5864fe6060f1SDimitry Andric   InstructionCost::CostType SplitFactor =
5865fe6060f1SDimitry Andric       *std::max(IdxsLT.first, SrcLT.first).getValue();
58660b57cec5SDimitry Andric   if (SplitFactor > 1) {
58670b57cec5SDimitry Andric     // Handle splitting of vector of pointers
58685ffd83dbSDimitry Andric     auto *SplitSrcTy =
58695ffd83dbSDimitry Andric         FixedVectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
5870*0fca6ea1SDimitry Andric     return SplitFactor * getGSVectorCost(Opcode, CostKind, SplitSrcTy, Ptr,
5871*0fca6ea1SDimitry Andric                                          Alignment, AddressSpace);
58720b57cec5SDimitry Andric   }
58730b57cec5SDimitry Andric 
5874*0fca6ea1SDimitry Andric   // If we didn't split, this will be a single gather/scatter instruction.
5875*0fca6ea1SDimitry Andric   if (CostKind == TTI::TCK_CodeSize)
5876*0fca6ea1SDimitry Andric     return 1;
5877*0fca6ea1SDimitry Andric 
58780b57cec5SDimitry Andric   // The gather / scatter cost is given by Intel architects. It is a rough
58790b57cec5SDimitry Andric   // number since we are looking at one instruction in a time.
5880*0fca6ea1SDimitry Andric   const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
5881e8d8bef9SDimitry Andric                                                        : getScatterOverhead();
58820b57cec5SDimitry Andric   return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
58835ffd83dbSDimitry Andric                                            MaybeAlign(Alignment), AddressSpace,
5884*0fca6ea1SDimitry Andric                                            CostKind);
58850b57cec5SDimitry Andric }
58860b57cec5SDimitry Andric 
58870b57cec5SDimitry Andric /// Calculate the cost of Gather / Scatter operation
5888fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getGatherScatterOpCost(
5889fe6060f1SDimitry Andric     unsigned Opcode, Type *SrcVTy, const Value *Ptr, bool VariableMask,
5890fe6060f1SDimitry Andric     Align Alignment, TTI::TargetCostKind CostKind,
58915ffd83dbSDimitry Andric     const Instruction *I = nullptr) {
5892480093f4SDimitry Andric   if ((Opcode == Instruction::Load &&
589304eeddc0SDimitry Andric        (!isLegalMaskedGather(SrcVTy, Align(Alignment)) ||
589404eeddc0SDimitry Andric         forceScalarizeMaskedGather(cast<VectorType>(SrcVTy),
589504eeddc0SDimitry Andric                                    Align(Alignment)))) ||
5896480093f4SDimitry Andric       (Opcode == Instruction::Store &&
589704eeddc0SDimitry Andric        (!isLegalMaskedScatter(SrcVTy, Align(Alignment)) ||
589804eeddc0SDimitry Andric         forceScalarizeMaskedScatter(cast<VectorType>(SrcVTy),
589904eeddc0SDimitry Andric                                     Align(Alignment)))))
5900*0fca6ea1SDimitry Andric     return BaseT::getGatherScatterOpCost(Opcode, SrcVTy, Ptr, VariableMask,
5901*0fca6ea1SDimitry Andric                                          Alignment, CostKind, I);
59020b57cec5SDimitry Andric 
5903*0fca6ea1SDimitry Andric   assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
5904*0fca6ea1SDimitry Andric   PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
5905*0fca6ea1SDimitry Andric   if (!PtrTy && Ptr->getType()->isVectorTy())
5906*0fca6ea1SDimitry Andric     PtrTy = dyn_cast<PointerType>(
5907*0fca6ea1SDimitry Andric         cast<VectorType>(Ptr->getType())->getElementType());
5908*0fca6ea1SDimitry Andric   assert(PtrTy && "Unexpected type for Ptr argument");
5909*0fca6ea1SDimitry Andric   unsigned AddressSpace = PtrTy->getAddressSpace();
5910*0fca6ea1SDimitry Andric   return getGSVectorCost(Opcode, CostKind, SrcVTy, Ptr, Alignment,
5911*0fca6ea1SDimitry Andric                          AddressSpace);
59120b57cec5SDimitry Andric }
59130b57cec5SDimitry Andric 
591481ad6265SDimitry Andric bool X86TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
591581ad6265SDimitry Andric                                const TargetTransformInfo::LSRCost &C2) {
59160b57cec5SDimitry Andric     // X86 specific here are "instruction number 1st priority".
59170b57cec5SDimitry Andric     return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
59180b57cec5SDimitry Andric                     C1.NumIVMuls, C1.NumBaseAdds,
59190b57cec5SDimitry Andric                     C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
59200b57cec5SDimitry Andric            std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
59210b57cec5SDimitry Andric                     C2.NumIVMuls, C2.NumBaseAdds,
59220b57cec5SDimitry Andric                     C2.ScaleCost, C2.ImmCost, C2.SetupCost);
59230b57cec5SDimitry Andric }
59240b57cec5SDimitry Andric 
59250b57cec5SDimitry Andric bool X86TTIImpl::canMacroFuseCmp() {
59260b57cec5SDimitry Andric   return ST->hasMacroFusion() || ST->hasBranchFusion();
59270b57cec5SDimitry Andric }
59280b57cec5SDimitry Andric 
59295ffd83dbSDimitry Andric bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {
5930*0fca6ea1SDimitry Andric   Type *ScalarTy = DataTy->getScalarType();
5931*0fca6ea1SDimitry Andric 
5932*0fca6ea1SDimitry Andric   // The backend can't handle a single element vector w/o CFCMOV.
5933*0fca6ea1SDimitry Andric   if (isa<VectorType>(DataTy) && cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5934*0fca6ea1SDimitry Andric     return ST->hasCF() && hasConditionalLoadStoreForType(ScalarTy);
5935*0fca6ea1SDimitry Andric 
59360b57cec5SDimitry Andric   if (!ST->hasAVX())
59370b57cec5SDimitry Andric     return false;
59380b57cec5SDimitry Andric 
59390b57cec5SDimitry Andric   if (ScalarTy->isPointerTy())
59400b57cec5SDimitry Andric     return true;
59410b57cec5SDimitry Andric 
59420b57cec5SDimitry Andric   if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
59430b57cec5SDimitry Andric     return true;
59440b57cec5SDimitry Andric 
5945fcaf7f86SDimitry Andric   if (ScalarTy->isHalfTy() && ST->hasBWI())
5946349cc55cSDimitry Andric     return true;
5947349cc55cSDimitry Andric 
594806c3fb27SDimitry Andric   if (ScalarTy->isBFloatTy() && ST->hasBF16())
594906c3fb27SDimitry Andric     return true;
595006c3fb27SDimitry Andric 
59510b57cec5SDimitry Andric   if (!ScalarTy->isIntegerTy())
59520b57cec5SDimitry Andric     return false;
59530b57cec5SDimitry Andric 
59540b57cec5SDimitry Andric   unsigned IntWidth = ScalarTy->getIntegerBitWidth();
59550b57cec5SDimitry Andric   return IntWidth == 32 || IntWidth == 64 ||
59560b57cec5SDimitry Andric          ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
59570b57cec5SDimitry Andric }
59580b57cec5SDimitry Andric 
59595ffd83dbSDimitry Andric bool X86TTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment) {
59608bcb0991SDimitry Andric   return isLegalMaskedLoad(DataType, Alignment);
59610b57cec5SDimitry Andric }
59620b57cec5SDimitry Andric 
59638bcb0991SDimitry Andric bool X86TTIImpl::isLegalNTLoad(Type *DataType, Align Alignment) {
59640b57cec5SDimitry Andric   unsigned DataSize = DL.getTypeStoreSize(DataType);
59650b57cec5SDimitry Andric   // The only supported nontemporal loads are for aligned vectors of 16 or 32
59660b57cec5SDimitry Andric   // bytes.  Note that 32-byte nontemporal vector loads are supported by AVX2
59670b57cec5SDimitry Andric   // (the equivalent stores only require AVX).
59680b57cec5SDimitry Andric   if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
59690b57cec5SDimitry Andric     return DataSize == 16 ?  ST->hasSSE1() : ST->hasAVX2();
59700b57cec5SDimitry Andric 
59710b57cec5SDimitry Andric   return false;
59720b57cec5SDimitry Andric }
59730b57cec5SDimitry Andric 
59748bcb0991SDimitry Andric bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) {
59750b57cec5SDimitry Andric   unsigned DataSize = DL.getTypeStoreSize(DataType);
59760b57cec5SDimitry Andric 
59770b57cec5SDimitry Andric   // SSE4A supports nontemporal stores of float and double at arbitrary
59780b57cec5SDimitry Andric   // alignment.
59790b57cec5SDimitry Andric   if (ST->hasSSE4A() && (DataType->isFloatTy() || DataType->isDoubleTy()))
59800b57cec5SDimitry Andric     return true;
59810b57cec5SDimitry Andric 
59820b57cec5SDimitry Andric   // Besides the SSE4A subtarget exception above, only aligned stores are
59830b57cec5SDimitry Andric   // available nontemporaly on any other subtarget.  And only stores with a size
59840b57cec5SDimitry Andric   // of 4..32 bytes (powers of 2, only) are permitted.
59850b57cec5SDimitry Andric   if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
59860b57cec5SDimitry Andric       !isPowerOf2_32(DataSize))
59870b57cec5SDimitry Andric     return false;
59880b57cec5SDimitry Andric 
59890b57cec5SDimitry Andric   // 32-byte vector nontemporal stores are supported by AVX (the equivalent
59900b57cec5SDimitry Andric   // loads require AVX2).
59910b57cec5SDimitry Andric   if (DataSize == 32)
59920b57cec5SDimitry Andric     return ST->hasAVX();
5993349cc55cSDimitry Andric   if (DataSize == 16)
59940b57cec5SDimitry Andric     return ST->hasSSE1();
59950b57cec5SDimitry Andric   return true;
59960b57cec5SDimitry Andric }
59970b57cec5SDimitry Andric 
599881ad6265SDimitry Andric bool X86TTIImpl::isLegalBroadcastLoad(Type *ElementTy,
599981ad6265SDimitry Andric                                       ElementCount NumElements) const {
600081ad6265SDimitry Andric   // movddup
600181ad6265SDimitry Andric   return ST->hasSSE3() && !NumElements.isScalable() &&
600281ad6265SDimitry Andric          NumElements.getFixedValue() == 2 &&
600381ad6265SDimitry Andric          ElementTy == Type::getDoubleTy(ElementTy->getContext());
600481ad6265SDimitry Andric }
600581ad6265SDimitry Andric 
6006*0fca6ea1SDimitry Andric bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) {
60070b57cec5SDimitry Andric   if (!isa<VectorType>(DataTy))
60080b57cec5SDimitry Andric     return false;
60090b57cec5SDimitry Andric 
60100b57cec5SDimitry Andric   if (!ST->hasAVX512())
60110b57cec5SDimitry Andric     return false;
60120b57cec5SDimitry Andric 
60130b57cec5SDimitry Andric   // The backend can't handle a single element vector.
60145ffd83dbSDimitry Andric   if (cast<FixedVectorType>(DataTy)->getNumElements() == 1)
60150b57cec5SDimitry Andric     return false;
60160b57cec5SDimitry Andric 
60175ffd83dbSDimitry Andric   Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
60180b57cec5SDimitry Andric 
60190b57cec5SDimitry Andric   if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
60200b57cec5SDimitry Andric     return true;
60210b57cec5SDimitry Andric 
60220b57cec5SDimitry Andric   if (!ScalarTy->isIntegerTy())
60230b57cec5SDimitry Andric     return false;
60240b57cec5SDimitry Andric 
60250b57cec5SDimitry Andric   unsigned IntWidth = ScalarTy->getIntegerBitWidth();
60260b57cec5SDimitry Andric   return IntWidth == 32 || IntWidth == 64 ||
60270b57cec5SDimitry Andric          ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
60280b57cec5SDimitry Andric }
60290b57cec5SDimitry Andric 
6030*0fca6ea1SDimitry Andric bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) {
6031*0fca6ea1SDimitry Andric   return isLegalMaskedExpandLoad(DataTy, Alignment);
60320b57cec5SDimitry Andric }
60330b57cec5SDimitry Andric 
6034349cc55cSDimitry Andric bool X86TTIImpl::supportsGather() const {
60350b57cec5SDimitry Andric   // Some CPUs have better gather performance than others.
60360b57cec5SDimitry Andric   // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
60370b57cec5SDimitry Andric   // enable gather with a -march.
6038349cc55cSDimitry Andric   return ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2());
6039349cc55cSDimitry Andric }
6040349cc55cSDimitry Andric 
604104eeddc0SDimitry Andric bool X86TTIImpl::forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
604204eeddc0SDimitry Andric   // Gather / Scatter for vector 2 is not profitable on KNL / SKX
604304eeddc0SDimitry Andric   // Vector-4 of gather/scatter instruction does not exist on KNL. We can extend
604404eeddc0SDimitry Andric   // it to 8 elements, but zeroing upper bits of the mask vector will add more
604504eeddc0SDimitry Andric   // instructions. Right now we give the scalar cost of vector-4 for KNL. TODO:
604604eeddc0SDimitry Andric   // Check, maybe the gather/scatter instruction is better in the VariableMask
604704eeddc0SDimitry Andric   // case.
604804eeddc0SDimitry Andric   unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
604904eeddc0SDimitry Andric   return NumElts == 1 ||
605004eeddc0SDimitry Andric          (ST->hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
605104eeddc0SDimitry Andric }
605204eeddc0SDimitry Andric 
60538a4dda33SDimitry Andric bool X86TTIImpl::isLegalMaskedGatherScatter(Type *DataTy, Align Alignment) {
60540b57cec5SDimitry Andric   Type *ScalarTy = DataTy->getScalarType();
60550b57cec5SDimitry Andric   if (ScalarTy->isPointerTy())
60560b57cec5SDimitry Andric     return true;
60570b57cec5SDimitry Andric 
60580b57cec5SDimitry Andric   if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
60590b57cec5SDimitry Andric     return true;
60600b57cec5SDimitry Andric 
60610b57cec5SDimitry Andric   if (!ScalarTy->isIntegerTy())
60620b57cec5SDimitry Andric     return false;
60630b57cec5SDimitry Andric 
60640b57cec5SDimitry Andric   unsigned IntWidth = ScalarTy->getIntegerBitWidth();
60650b57cec5SDimitry Andric   return IntWidth == 32 || IntWidth == 64;
60660b57cec5SDimitry Andric }
60670b57cec5SDimitry Andric 
60688a4dda33SDimitry Andric bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) {
60698a4dda33SDimitry Andric   if (!supportsGather() || !ST->preferGather())
60708a4dda33SDimitry Andric     return false;
60718a4dda33SDimitry Andric   return isLegalMaskedGatherScatter(DataTy, Alignment);
60728a4dda33SDimitry Andric }
60738a4dda33SDimitry Andric 
607481ad6265SDimitry Andric bool X86TTIImpl::isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
607581ad6265SDimitry Andric                                  unsigned Opcode1,
607681ad6265SDimitry Andric                                  const SmallBitVector &OpcodeMask) const {
607781ad6265SDimitry Andric   // ADDSUBPS  4xf32 SSE3
607881ad6265SDimitry Andric   // VADDSUBPS 4xf32 AVX
607981ad6265SDimitry Andric   // VADDSUBPS 8xf32 AVX2
608081ad6265SDimitry Andric   // ADDSUBPD  2xf64 SSE3
608181ad6265SDimitry Andric   // VADDSUBPD 2xf64 AVX
608281ad6265SDimitry Andric   // VADDSUBPD 4xf64 AVX2
608381ad6265SDimitry Andric 
608481ad6265SDimitry Andric   unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
608581ad6265SDimitry Andric   assert(OpcodeMask.size() == NumElements && "Mask and VecTy are incompatible");
608681ad6265SDimitry Andric   if (!isPowerOf2_32(NumElements))
608781ad6265SDimitry Andric     return false;
608881ad6265SDimitry Andric   // Check the opcode pattern. We apply the mask on the opcode arguments and
608981ad6265SDimitry Andric   // then check if it is what we expect.
609081ad6265SDimitry Andric   for (int Lane : seq<int>(0, NumElements)) {
609181ad6265SDimitry Andric     unsigned Opc = OpcodeMask.test(Lane) ? Opcode1 : Opcode0;
609281ad6265SDimitry Andric     // We expect FSub for even lanes and FAdd for odd lanes.
609381ad6265SDimitry Andric     if (Lane % 2 == 0 && Opc != Instruction::FSub)
609481ad6265SDimitry Andric       return false;
609581ad6265SDimitry Andric     if (Lane % 2 == 1 && Opc != Instruction::FAdd)
609681ad6265SDimitry Andric       return false;
609781ad6265SDimitry Andric   }
609881ad6265SDimitry Andric   // Now check that the pattern is supported by the target ISA.
609981ad6265SDimitry Andric   Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
610081ad6265SDimitry Andric   if (ElemTy->isFloatTy())
610181ad6265SDimitry Andric     return ST->hasSSE3() && NumElements % 4 == 0;
610281ad6265SDimitry Andric   if (ElemTy->isDoubleTy())
610381ad6265SDimitry Andric     return ST->hasSSE3() && NumElements % 2 == 0;
610481ad6265SDimitry Andric   return false;
610581ad6265SDimitry Andric }
610681ad6265SDimitry Andric 
61075ffd83dbSDimitry Andric bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, Align Alignment) {
61080b57cec5SDimitry Andric   // AVX2 doesn't support scatter
61098a4dda33SDimitry Andric   if (!ST->hasAVX512() || !ST->preferScatter())
61100b57cec5SDimitry Andric     return false;
61118a4dda33SDimitry Andric   return isLegalMaskedGatherScatter(DataType, Alignment);
61120b57cec5SDimitry Andric }
61130b57cec5SDimitry Andric 
61140b57cec5SDimitry Andric bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
61150b57cec5SDimitry Andric   EVT VT = TLI->getValueType(DL, DataType);
61160b57cec5SDimitry Andric   return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT);
61170b57cec5SDimitry Andric }
61180b57cec5SDimitry Andric 
6119bdd1243dSDimitry Andric bool X86TTIImpl::isExpensiveToSpeculativelyExecute(const Instruction* I) {
6120bdd1243dSDimitry Andric   // FDIV is always expensive, even if it has a very low uop count.
6121bdd1243dSDimitry Andric   // TODO: Still necessary for recent CPUs with low latency/throughput fdiv?
6122bdd1243dSDimitry Andric   if (I->getOpcode() == Instruction::FDiv)
6123bdd1243dSDimitry Andric     return true;
6124bdd1243dSDimitry Andric 
6125bdd1243dSDimitry Andric   return BaseT::isExpensiveToSpeculativelyExecute(I);
6126bdd1243dSDimitry Andric }
6127bdd1243dSDimitry Andric 
61280b57cec5SDimitry Andric bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
61290b57cec5SDimitry Andric   return false;
61300b57cec5SDimitry Andric }
61310b57cec5SDimitry Andric 
61320b57cec5SDimitry Andric bool X86TTIImpl::areInlineCompatible(const Function *Caller,
61330b57cec5SDimitry Andric                                      const Function *Callee) const {
61340b57cec5SDimitry Andric   const TargetMachine &TM = getTLI()->getTargetMachine();
61350b57cec5SDimitry Andric 
61360b57cec5SDimitry Andric   // Work this as a subsetting of subtarget features.
61370b57cec5SDimitry Andric   const FeatureBitset &CallerBits =
61380b57cec5SDimitry Andric       TM.getSubtargetImpl(*Caller)->getFeatureBits();
61390b57cec5SDimitry Andric   const FeatureBitset &CalleeBits =
61400b57cec5SDimitry Andric       TM.getSubtargetImpl(*Callee)->getFeatureBits();
61410b57cec5SDimitry Andric 
614204eeddc0SDimitry Andric   // Check whether features are the same (apart from the ignore list).
61430b57cec5SDimitry Andric   FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
61440b57cec5SDimitry Andric   FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
614504eeddc0SDimitry Andric   if (RealCallerBits == RealCalleeBits)
614604eeddc0SDimitry Andric     return true;
614704eeddc0SDimitry Andric 
614804eeddc0SDimitry Andric   // If the features are a subset, we need to additionally check for calls
614904eeddc0SDimitry Andric   // that may become ABI-incompatible as a result of inlining.
615004eeddc0SDimitry Andric   if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
615104eeddc0SDimitry Andric     return false;
615204eeddc0SDimitry Andric 
615304eeddc0SDimitry Andric   for (const Instruction &I : instructions(Callee)) {
615404eeddc0SDimitry Andric     if (const auto *CB = dyn_cast<CallBase>(&I)) {
6155439352acSDimitry Andric       // Having more target features is fine for inline ASM.
6156439352acSDimitry Andric       if (CB->isInlineAsm())
6157439352acSDimitry Andric         continue;
6158439352acSDimitry Andric 
615904eeddc0SDimitry Andric       SmallVector<Type *, 8> Types;
616004eeddc0SDimitry Andric       for (Value *Arg : CB->args())
616104eeddc0SDimitry Andric         Types.push_back(Arg->getType());
616204eeddc0SDimitry Andric       if (!CB->getType()->isVoidTy())
616304eeddc0SDimitry Andric         Types.push_back(CB->getType());
616404eeddc0SDimitry Andric 
616504eeddc0SDimitry Andric       // Simple types are always ABI compatible.
616604eeddc0SDimitry Andric       auto IsSimpleTy = [](Type *Ty) {
616704eeddc0SDimitry Andric         return !Ty->isVectorTy() && !Ty->isAggregateType();
616804eeddc0SDimitry Andric       };
616904eeddc0SDimitry Andric       if (all_of(Types, IsSimpleTy))
617004eeddc0SDimitry Andric         continue;
617104eeddc0SDimitry Andric 
617204eeddc0SDimitry Andric       if (Function *NestedCallee = CB->getCalledFunction()) {
617304eeddc0SDimitry Andric         // Assume that intrinsics are always ABI compatible.
617404eeddc0SDimitry Andric         if (NestedCallee->isIntrinsic())
617504eeddc0SDimitry Andric           continue;
617604eeddc0SDimitry Andric 
617704eeddc0SDimitry Andric         // Do a precise compatibility check.
617804eeddc0SDimitry Andric         if (!areTypesABICompatible(Caller, NestedCallee, Types))
617904eeddc0SDimitry Andric           return false;
618004eeddc0SDimitry Andric       } else {
618104eeddc0SDimitry Andric         // We don't know the target features of the callee,
618204eeddc0SDimitry Andric         // assume it is incompatible.
618304eeddc0SDimitry Andric         return false;
618404eeddc0SDimitry Andric       }
618504eeddc0SDimitry Andric     }
618604eeddc0SDimitry Andric   }
618704eeddc0SDimitry Andric   return true;
61880b57cec5SDimitry Andric }
61890b57cec5SDimitry Andric 
61900eae32dcSDimitry Andric bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
61910eae32dcSDimitry Andric                                        const Function *Callee,
61920eae32dcSDimitry Andric                                        const ArrayRef<Type *> &Types) const {
61930eae32dcSDimitry Andric   if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
61940b57cec5SDimitry Andric     return false;
61950b57cec5SDimitry Andric 
61960b57cec5SDimitry Andric   // If we get here, we know the target features match. If one function
61970b57cec5SDimitry Andric   // considers 512-bit vectors legal and the other does not, consider them
61980b57cec5SDimitry Andric   // incompatible.
61990b57cec5SDimitry Andric   const TargetMachine &TM = getTLI()->getTargetMachine();
62000b57cec5SDimitry Andric 
62015ffd83dbSDimitry Andric   if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() ==
62025ffd83dbSDimitry Andric       TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs())
62035ffd83dbSDimitry Andric     return true;
62045ffd83dbSDimitry Andric 
62055ffd83dbSDimitry Andric   // Consider the arguments compatible if they aren't vectors or aggregates.
62065ffd83dbSDimitry Andric   // FIXME: Look at the size of vectors.
62075ffd83dbSDimitry Andric   // FIXME: Look at the element types of aggregates to see if there are vectors.
62080eae32dcSDimitry Andric   return llvm::none_of(Types,
62090eae32dcSDimitry Andric       [](Type *T) { return T->isVectorTy() || T->isAggregateType(); });
62100b57cec5SDimitry Andric }
62110b57cec5SDimitry Andric 
62120b57cec5SDimitry Andric X86TTIImpl::TTI::MemCmpExpansionOptions
62130b57cec5SDimitry Andric X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
62140b57cec5SDimitry Andric   TTI::MemCmpExpansionOptions Options;
62150b57cec5SDimitry Andric   Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
62160b57cec5SDimitry Andric   Options.NumLoadsPerBlock = 2;
62175ffd83dbSDimitry Andric   // All GPR and vector loads can be unaligned.
62185ffd83dbSDimitry Andric   Options.AllowOverlappingLoads = true;
62190b57cec5SDimitry Andric   if (IsZeroCmp) {
62200b57cec5SDimitry Andric     // Only enable vector loads for equality comparison. Right now the vector
62210b57cec5SDimitry Andric     // version is not as fast for three way compare (see #33329).
62220b57cec5SDimitry Andric     const unsigned PreferredWidth = ST->getPreferVectorWidth();
62235f757f3fSDimitry Andric     if (PreferredWidth >= 512 && ST->hasAVX512() && ST->hasEVEX512())
62245f757f3fSDimitry Andric       Options.LoadSizes.push_back(64);
6225480093f4SDimitry Andric     if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32);
62260b57cec5SDimitry Andric     if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
62270b57cec5SDimitry Andric   }
62280b57cec5SDimitry Andric   if (ST->is64Bit()) {
62290b57cec5SDimitry Andric     Options.LoadSizes.push_back(8);
62300b57cec5SDimitry Andric   }
62310b57cec5SDimitry Andric   Options.LoadSizes.push_back(4);
62320b57cec5SDimitry Andric   Options.LoadSizes.push_back(2);
62330b57cec5SDimitry Andric   Options.LoadSizes.push_back(1);
62340b57cec5SDimitry Andric   return Options;
62350b57cec5SDimitry Andric }
62360b57cec5SDimitry Andric 
6237349cc55cSDimitry Andric bool X86TTIImpl::prefersVectorizedAddressing() const {
6238349cc55cSDimitry Andric   return supportsGather();
6239349cc55cSDimitry Andric }
6240349cc55cSDimitry Andric 
6241349cc55cSDimitry Andric bool X86TTIImpl::supportsEfficientVectorElementLoadStore() const {
6242349cc55cSDimitry Andric   return false;
6243349cc55cSDimitry Andric }
6244349cc55cSDimitry Andric 
62450b57cec5SDimitry Andric bool X86TTIImpl::enableInterleavedAccessVectorization() {
62460b57cec5SDimitry Andric   // TODO: We expect this to be beneficial regardless of arch,
62470b57cec5SDimitry Andric   // but there are currently some unexplained performance artifacts on Atom.
62480b57cec5SDimitry Andric   // As a temporary solution, disable on Atom.
62490b57cec5SDimitry Andric   return !(ST->isAtom());
62500b57cec5SDimitry Andric }
62510b57cec5SDimitry Andric 
62520b57cec5SDimitry Andric // Get estimation for interleaved load/store operations and strided load.
62530b57cec5SDimitry Andric // \p Indices contains indices for strided load.
62540b57cec5SDimitry Andric // \p Factor - the factor of interleaving.
62550b57cec5SDimitry Andric // AVX-512 provides 3-src shuffles that significantly reduces the cost.
6256fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
62575ffd83dbSDimitry Andric     unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
62585ffd83dbSDimitry Andric     ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
62595ffd83dbSDimitry Andric     TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
62600b57cec5SDimitry Andric   // VecTy for interleave memop is <VF*Factor x Elt>.
62610b57cec5SDimitry Andric   // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
62620b57cec5SDimitry Andric   // VecTy = <12 x i32>.
62630b57cec5SDimitry Andric 
62640b57cec5SDimitry Andric   // Calculate the number of memory operations (NumOfMemOps), required
62650b57cec5SDimitry Andric   // for load/store the VecTy.
6266bdd1243dSDimitry Andric   MVT LegalVT = getTypeLegalizationCost(VecTy).second;
62670b57cec5SDimitry Andric   unsigned VecTySize = DL.getTypeStoreSize(VecTy);
62680b57cec5SDimitry Andric   unsigned LegalVTSize = LegalVT.getStoreSize();
62690b57cec5SDimitry Andric   unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
62700b57cec5SDimitry Andric 
62710b57cec5SDimitry Andric   // Get the cost of one memory operation.
62725ffd83dbSDimitry Andric   auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
62730b57cec5SDimitry Andric                                              LegalVT.getVectorNumElements());
6274349cc55cSDimitry Andric   InstructionCost MemOpCost;
62754824e7fdSDimitry Andric   bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
62764824e7fdSDimitry Andric   if (UseMaskedMemOp)
6277349cc55cSDimitry Andric     MemOpCost = getMaskedMemoryOpCost(Opcode, SingleMemOpTy, Alignment,
6278349cc55cSDimitry Andric                                       AddressSpace, CostKind);
6279349cc55cSDimitry Andric   else
6280349cc55cSDimitry Andric     MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, MaybeAlign(Alignment),
6281349cc55cSDimitry Andric                                 AddressSpace, CostKind);
62820b57cec5SDimitry Andric 
62835ffd83dbSDimitry Andric   unsigned VF = VecTy->getNumElements() / Factor;
6284*0fca6ea1SDimitry Andric   MVT VT =
6285*0fca6ea1SDimitry Andric       MVT::getVectorVT(TLI->getSimpleValueType(DL, VecTy->getScalarType()), VF);
62860b57cec5SDimitry Andric 
6287349cc55cSDimitry Andric   InstructionCost MaskCost;
62884824e7fdSDimitry Andric   if (UseMaskedMemOp) {
6289349cc55cSDimitry Andric     APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements());
6290349cc55cSDimitry Andric     for (unsigned Index : Indices) {
6291349cc55cSDimitry Andric       assert(Index < Factor && "Invalid index for interleaved memory op");
6292349cc55cSDimitry Andric       for (unsigned Elm = 0; Elm < VF; Elm++)
6293349cc55cSDimitry Andric         DemandedLoadStoreElts.setBit(Index + Elm * Factor);
6294349cc55cSDimitry Andric     }
6295349cc55cSDimitry Andric 
62964824e7fdSDimitry Andric     Type *I1Type = Type::getInt1Ty(VecTy->getContext());
6297349cc55cSDimitry Andric 
6298349cc55cSDimitry Andric     MaskCost = getReplicationShuffleCost(
62994824e7fdSDimitry Andric         I1Type, Factor, VF,
6300349cc55cSDimitry Andric         UseMaskForGaps ? DemandedLoadStoreElts
6301349cc55cSDimitry Andric                        : APInt::getAllOnes(VecTy->getNumElements()),
6302349cc55cSDimitry Andric         CostKind);
6303349cc55cSDimitry Andric 
6304349cc55cSDimitry Andric     // The Gaps mask is invariant and created outside the loop, therefore the
6305349cc55cSDimitry Andric     // cost of creating it is not accounted for here. However if we have both
6306349cc55cSDimitry Andric     // a MaskForGaps and some other mask that guards the execution of the
6307349cc55cSDimitry Andric     // memory access, we need to account for the cost of And-ing the two masks
6308349cc55cSDimitry Andric     // inside the loop.
6309349cc55cSDimitry Andric     if (UseMaskForGaps) {
63104824e7fdSDimitry Andric       auto *MaskVT = FixedVectorType::get(I1Type, VecTy->getNumElements());
6311349cc55cSDimitry Andric       MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind);
6312349cc55cSDimitry Andric     }
6313349cc55cSDimitry Andric   }
6314349cc55cSDimitry Andric 
63150b57cec5SDimitry Andric   if (Opcode == Instruction::Load) {
63160b57cec5SDimitry Andric     // The tables (AVX512InterleavedLoadTbl and AVX512InterleavedStoreTbl)
63170b57cec5SDimitry Andric     // contain the cost of the optimized shuffle sequence that the
63180b57cec5SDimitry Andric     // X86InterleavedAccess pass will generate.
63190b57cec5SDimitry Andric     // The cost of loads and stores are computed separately from the table.
63200b57cec5SDimitry Andric 
63210b57cec5SDimitry Andric     // X86InterleavedAccess support only the following interleaved-access group.
63220b57cec5SDimitry Andric     static const CostTblEntry AVX512InterleavedLoadTbl[] = {
63230b57cec5SDimitry Andric         {3, MVT::v16i8, 12}, //(load 48i8 and) deinterleave into 3 x 16i8
63240b57cec5SDimitry Andric         {3, MVT::v32i8, 14}, //(load 96i8 and) deinterleave into 3 x 32i8
63250b57cec5SDimitry Andric         {3, MVT::v64i8, 22}, //(load 96i8 and) deinterleave into 3 x 32i8
63260b57cec5SDimitry Andric     };
63270b57cec5SDimitry Andric 
63280b57cec5SDimitry Andric     if (const auto *Entry =
63290b57cec5SDimitry Andric             CostTableLookup(AVX512InterleavedLoadTbl, Factor, VT))
6330349cc55cSDimitry Andric       return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
63310b57cec5SDimitry Andric     //If an entry does not exist, fallback to the default implementation.
63320b57cec5SDimitry Andric 
63330b57cec5SDimitry Andric     // Kind of shuffle depends on number of loaded values.
63340b57cec5SDimitry Andric     // If we load the entire data in one register, we can use a 1-src shuffle.
63350b57cec5SDimitry Andric     // Otherwise, we'll merge 2 sources in each operation.
63360b57cec5SDimitry Andric     TTI::ShuffleKind ShuffleKind =
63370b57cec5SDimitry Andric         (NumOfMemOps > 1) ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc;
63380b57cec5SDimitry Andric 
6339bdd1243dSDimitry Andric     InstructionCost ShuffleCost = getShuffleCost(
6340bdd1243dSDimitry Andric         ShuffleKind, SingleMemOpTy, std::nullopt, CostKind, 0, nullptr);
63410b57cec5SDimitry Andric 
63420b57cec5SDimitry Andric     unsigned NumOfLoadsInInterleaveGrp =
63430b57cec5SDimitry Andric         Indices.size() ? Indices.size() : Factor;
63445ffd83dbSDimitry Andric     auto *ResultTy = FixedVectorType::get(VecTy->getElementType(),
63455ffd83dbSDimitry Andric                                           VecTy->getNumElements() / Factor);
6346fe6060f1SDimitry Andric     InstructionCost NumOfResults =
6347bdd1243dSDimitry Andric         getTypeLegalizationCost(ResultTy).first * NumOfLoadsInInterleaveGrp;
63480b57cec5SDimitry Andric 
63490b57cec5SDimitry Andric     // About a half of the loads may be folded in shuffles when we have only
63504824e7fdSDimitry Andric     // one result. If we have more than one result, or the loads are masked,
63514824e7fdSDimitry Andric     // we do not fold loads at all.
63520b57cec5SDimitry Andric     unsigned NumOfUnfoldedLoads =
63534824e7fdSDimitry Andric         UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
63540b57cec5SDimitry Andric 
63550b57cec5SDimitry Andric     // Get a number of shuffle operations per result.
63560b57cec5SDimitry Andric     unsigned NumOfShufflesPerResult =
63570b57cec5SDimitry Andric         std::max((unsigned)1, (unsigned)(NumOfMemOps - 1));
63580b57cec5SDimitry Andric 
63590b57cec5SDimitry Andric     // The SK_MergeTwoSrc shuffle clobbers one of src operands.
63600b57cec5SDimitry Andric     // When we have more than one destination, we need additional instructions
63610b57cec5SDimitry Andric     // to keep sources.
6362fe6060f1SDimitry Andric     InstructionCost NumOfMoves = 0;
63630b57cec5SDimitry Andric     if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc)
63640b57cec5SDimitry Andric       NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
63650b57cec5SDimitry Andric 
6366fe6060f1SDimitry Andric     InstructionCost Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost +
6367349cc55cSDimitry Andric                            MaskCost + NumOfUnfoldedLoads * MemOpCost +
6368349cc55cSDimitry Andric                            NumOfMoves;
63690b57cec5SDimitry Andric 
63700b57cec5SDimitry Andric     return Cost;
63710b57cec5SDimitry Andric   }
63720b57cec5SDimitry Andric 
63730b57cec5SDimitry Andric   // Store.
63740b57cec5SDimitry Andric   assert(Opcode == Instruction::Store &&
63750b57cec5SDimitry Andric          "Expected Store Instruction at this  point");
63760b57cec5SDimitry Andric   // X86InterleavedAccess support only the following interleaved-access group.
63770b57cec5SDimitry Andric   static const CostTblEntry AVX512InterleavedStoreTbl[] = {
63780b57cec5SDimitry Andric       {3, MVT::v16i8, 12}, // interleave 3 x 16i8 into 48i8 (and store)
63790b57cec5SDimitry Andric       {3, MVT::v32i8, 14}, // interleave 3 x 32i8 into 96i8 (and store)
63800b57cec5SDimitry Andric       {3, MVT::v64i8, 26}, // interleave 3 x 64i8 into 96i8 (and store)
63810b57cec5SDimitry Andric 
63820b57cec5SDimitry Andric       {4, MVT::v8i8, 10},  // interleave 4 x 8i8  into 32i8  (and store)
63830b57cec5SDimitry Andric       {4, MVT::v16i8, 11}, // interleave 4 x 16i8 into 64i8  (and store)
63840b57cec5SDimitry Andric       {4, MVT::v32i8, 14}, // interleave 4 x 32i8 into 128i8 (and store)
63850b57cec5SDimitry Andric       {4, MVT::v64i8, 24}  // interleave 4 x 32i8 into 256i8 (and store)
63860b57cec5SDimitry Andric   };
63870b57cec5SDimitry Andric 
63880b57cec5SDimitry Andric   if (const auto *Entry =
63890b57cec5SDimitry Andric           CostTableLookup(AVX512InterleavedStoreTbl, Factor, VT))
6390349cc55cSDimitry Andric     return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
63910b57cec5SDimitry Andric   //If an entry does not exist, fallback to the default implementation.
63920b57cec5SDimitry Andric 
63930b57cec5SDimitry Andric   // There is no strided stores meanwhile. And store can't be folded in
63940b57cec5SDimitry Andric   // shuffle.
63950b57cec5SDimitry Andric   unsigned NumOfSources = Factor; // The number of values to be merged.
6396bdd1243dSDimitry Andric   InstructionCost ShuffleCost = getShuffleCost(
6397bdd1243dSDimitry Andric       TTI::SK_PermuteTwoSrc, SingleMemOpTy, std::nullopt, CostKind, 0, nullptr);
63980b57cec5SDimitry Andric   unsigned NumOfShufflesPerStore = NumOfSources - 1;
63990b57cec5SDimitry Andric 
64000b57cec5SDimitry Andric   // The SK_MergeTwoSrc shuffle clobbers one of src operands.
64010b57cec5SDimitry Andric   // We need additional instructions to keep sources.
64020b57cec5SDimitry Andric   unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6403fe6060f1SDimitry Andric   InstructionCost Cost =
6404349cc55cSDimitry Andric       MaskCost +
6405fe6060f1SDimitry Andric       NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
64060b57cec5SDimitry Andric       NumOfMoves;
64070b57cec5SDimitry Andric   return Cost;
64080b57cec5SDimitry Andric }
64090b57cec5SDimitry Andric 
6410fe6060f1SDimitry Andric InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
6411349cc55cSDimitry Andric     unsigned Opcode, Type *BaseTy, unsigned Factor, ArrayRef<unsigned> Indices,
64125ffd83dbSDimitry Andric     Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
64135ffd83dbSDimitry Andric     bool UseMaskForCond, bool UseMaskForGaps) {
6414349cc55cSDimitry Andric   auto *VecTy = cast<FixedVectorType>(BaseTy);
6415349cc55cSDimitry Andric 
641606c3fb27SDimitry Andric   auto isSupportedOnAVX512 = [&](Type *VecTy) {
64175ffd83dbSDimitry Andric     Type *EltTy = cast<VectorType>(VecTy)->getElementType();
64180b57cec5SDimitry Andric     if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
64190b57cec5SDimitry Andric         EltTy->isIntegerTy(32) || EltTy->isPointerTy())
64200b57cec5SDimitry Andric       return true;
6421fcaf7f86SDimitry Andric     if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) || EltTy->isHalfTy())
642206c3fb27SDimitry Andric       return ST->hasBWI();
642306c3fb27SDimitry Andric     if (EltTy->isBFloatTy())
642406c3fb27SDimitry Andric       return ST->hasBF16();
64250b57cec5SDimitry Andric     return false;
64260b57cec5SDimitry Andric   };
642706c3fb27SDimitry Andric   if (ST->hasAVX512() && isSupportedOnAVX512(VecTy))
64285ffd83dbSDimitry Andric     return getInterleavedMemoryOpCostAVX512(
6429349cc55cSDimitry Andric         Opcode, VecTy, Factor, Indices, Alignment,
64305ffd83dbSDimitry Andric         AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps);
6431349cc55cSDimitry Andric 
6432349cc55cSDimitry Andric   if (UseMaskForCond || UseMaskForGaps)
6433349cc55cSDimitry Andric     return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
6434349cc55cSDimitry Andric                                              Alignment, AddressSpace, CostKind,
6435349cc55cSDimitry Andric                                              UseMaskForCond, UseMaskForGaps);
6436349cc55cSDimitry Andric 
6437349cc55cSDimitry Andric   // Get estimation for interleaved load/store operations for SSE-AVX2.
6438349cc55cSDimitry Andric   // As opposed to AVX-512, SSE-AVX2 do not have generic shuffles that allow
6439349cc55cSDimitry Andric   // computing the cost using a generic formula as a function of generic
6440349cc55cSDimitry Andric   // shuffles. We therefore use a lookup table instead, filled according to
6441349cc55cSDimitry Andric   // the instruction sequences that codegen currently generates.
6442349cc55cSDimitry Andric 
6443349cc55cSDimitry Andric   // VecTy for interleave memop is <VF*Factor x Elt>.
6444349cc55cSDimitry Andric   // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
6445349cc55cSDimitry Andric   // VecTy = <12 x i32>.
6446bdd1243dSDimitry Andric   MVT LegalVT = getTypeLegalizationCost(VecTy).second;
6447349cc55cSDimitry Andric 
6448349cc55cSDimitry Andric   // This function can be called with VecTy=<6xi128>, Factor=3, in which case
6449349cc55cSDimitry Andric   // the VF=2, while v2i128 is an unsupported MVT vector type
6450349cc55cSDimitry Andric   // (see MachineValueType.h::getVectorVT()).
6451349cc55cSDimitry Andric   if (!LegalVT.isVector())
6452349cc55cSDimitry Andric     return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
6453349cc55cSDimitry Andric                                              Alignment, AddressSpace, CostKind);
6454349cc55cSDimitry Andric 
6455349cc55cSDimitry Andric   unsigned VF = VecTy->getNumElements() / Factor;
6456349cc55cSDimitry Andric   Type *ScalarTy = VecTy->getElementType();
6457349cc55cSDimitry Andric   // Deduplicate entries, model floats/pointers as appropriately-sized integers.
6458349cc55cSDimitry Andric   if (!ScalarTy->isIntegerTy())
6459349cc55cSDimitry Andric     ScalarTy =
6460349cc55cSDimitry Andric         Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy));
6461349cc55cSDimitry Andric 
6462349cc55cSDimitry Andric   // Get the cost of all the memory operations.
6463349cc55cSDimitry Andric   // FIXME: discount dead loads.
6464349cc55cSDimitry Andric   InstructionCost MemOpCosts = getMemoryOpCost(
6465349cc55cSDimitry Andric       Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
6466349cc55cSDimitry Andric 
6467349cc55cSDimitry Andric   auto *VT = FixedVectorType::get(ScalarTy, VF);
6468349cc55cSDimitry Andric   EVT ETy = TLI->getValueType(DL, VT);
6469349cc55cSDimitry Andric   if (!ETy.isSimple())
6470349cc55cSDimitry Andric     return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
6471349cc55cSDimitry Andric                                              Alignment, AddressSpace, CostKind);
6472349cc55cSDimitry Andric 
6473349cc55cSDimitry Andric   // TODO: Complete for other data-types and strides.
6474349cc55cSDimitry Andric   // Each combination of Stride, element bit width and VF results in a different
6475349cc55cSDimitry Andric   // sequence; The cost tables are therefore accessed with:
6476349cc55cSDimitry Andric   // Factor (stride) and VectorType=VFxiN.
6477349cc55cSDimitry Andric   // The Cost accounts only for the shuffle sequence;
6478349cc55cSDimitry Andric   // The cost of the loads/stores is accounted for separately.
6479349cc55cSDimitry Andric   //
6480349cc55cSDimitry Andric   static const CostTblEntry AVX2InterleavedLoadTbl[] = {
6481349cc55cSDimitry Andric       {2, MVT::v2i8, 2},  // (load 4i8 and) deinterleave into 2 x 2i8
6482349cc55cSDimitry Andric       {2, MVT::v4i8, 2},  // (load 8i8 and) deinterleave into 2 x 4i8
6483349cc55cSDimitry Andric       {2, MVT::v8i8, 2},  // (load 16i8 and) deinterleave into 2 x 8i8
6484349cc55cSDimitry Andric       {2, MVT::v16i8, 4}, // (load 32i8 and) deinterleave into 2 x 16i8
6485349cc55cSDimitry Andric       {2, MVT::v32i8, 6}, // (load 64i8 and) deinterleave into 2 x 32i8
6486349cc55cSDimitry Andric 
6487349cc55cSDimitry Andric       {2, MVT::v8i16, 6},   // (load 16i16 and) deinterleave into 2 x 8i16
6488349cc55cSDimitry Andric       {2, MVT::v16i16, 9},  // (load 32i16 and) deinterleave into 2 x 16i16
6489349cc55cSDimitry Andric       {2, MVT::v32i16, 18}, // (load 64i16 and) deinterleave into 2 x 32i16
6490349cc55cSDimitry Andric 
6491349cc55cSDimitry Andric       {2, MVT::v8i32, 4},   // (load 16i32 and) deinterleave into 2 x 8i32
6492349cc55cSDimitry Andric       {2, MVT::v16i32, 8},  // (load 32i32 and) deinterleave into 2 x 16i32
6493349cc55cSDimitry Andric       {2, MVT::v32i32, 16}, // (load 64i32 and) deinterleave into 2 x 32i32
6494349cc55cSDimitry Andric 
6495349cc55cSDimitry Andric       {2, MVT::v4i64, 4},   // (load 8i64 and) deinterleave into 2 x 4i64
6496349cc55cSDimitry Andric       {2, MVT::v8i64, 8},   // (load 16i64 and) deinterleave into 2 x 8i64
6497349cc55cSDimitry Andric       {2, MVT::v16i64, 16}, // (load 32i64 and) deinterleave into 2 x 16i64
6498349cc55cSDimitry Andric       {2, MVT::v32i64, 32}, // (load 64i64 and) deinterleave into 2 x 32i64
6499349cc55cSDimitry Andric 
6500349cc55cSDimitry Andric       {3, MVT::v2i8, 3},   // (load 6i8 and) deinterleave into 3 x 2i8
6501349cc55cSDimitry Andric       {3, MVT::v4i8, 3},   // (load 12i8 and) deinterleave into 3 x 4i8
6502349cc55cSDimitry Andric       {3, MVT::v8i8, 6},   // (load 24i8 and) deinterleave into 3 x 8i8
6503349cc55cSDimitry Andric       {3, MVT::v16i8, 11}, // (load 48i8 and) deinterleave into 3 x 16i8
6504349cc55cSDimitry Andric       {3, MVT::v32i8, 14}, // (load 96i8 and) deinterleave into 3 x 32i8
6505349cc55cSDimitry Andric 
6506349cc55cSDimitry Andric       {3, MVT::v2i16, 5},   // (load 6i16 and) deinterleave into 3 x 2i16
6507349cc55cSDimitry Andric       {3, MVT::v4i16, 7},   // (load 12i16 and) deinterleave into 3 x 4i16
6508349cc55cSDimitry Andric       {3, MVT::v8i16, 9},   // (load 24i16 and) deinterleave into 3 x 8i16
6509349cc55cSDimitry Andric       {3, MVT::v16i16, 28}, // (load 48i16 and) deinterleave into 3 x 16i16
6510349cc55cSDimitry Andric       {3, MVT::v32i16, 56}, // (load 96i16 and) deinterleave into 3 x 32i16
6511349cc55cSDimitry Andric 
6512349cc55cSDimitry Andric       {3, MVT::v2i32, 3},   // (load 6i32 and) deinterleave into 3 x 2i32
6513349cc55cSDimitry Andric       {3, MVT::v4i32, 3},   // (load 12i32 and) deinterleave into 3 x 4i32
6514349cc55cSDimitry Andric       {3, MVT::v8i32, 7},   // (load 24i32 and) deinterleave into 3 x 8i32
6515349cc55cSDimitry Andric       {3, MVT::v16i32, 14}, // (load 48i32 and) deinterleave into 3 x 16i32
6516349cc55cSDimitry Andric       {3, MVT::v32i32, 32}, // (load 96i32 and) deinterleave into 3 x 32i32
6517349cc55cSDimitry Andric 
6518349cc55cSDimitry Andric       {3, MVT::v2i64, 1},   // (load 6i64 and) deinterleave into 3 x 2i64
6519349cc55cSDimitry Andric       {3, MVT::v4i64, 5},   // (load 12i64 and) deinterleave into 3 x 4i64
6520349cc55cSDimitry Andric       {3, MVT::v8i64, 10},  // (load 24i64 and) deinterleave into 3 x 8i64
6521349cc55cSDimitry Andric       {3, MVT::v16i64, 20}, // (load 48i64 and) deinterleave into 3 x 16i64
6522349cc55cSDimitry Andric 
6523349cc55cSDimitry Andric       {4, MVT::v2i8, 4},   // (load 8i8 and) deinterleave into 4 x 2i8
6524349cc55cSDimitry Andric       {4, MVT::v4i8, 4},   // (load 16i8 and) deinterleave into 4 x 4i8
6525349cc55cSDimitry Andric       {4, MVT::v8i8, 12},  // (load 32i8 and) deinterleave into 4 x 8i8
6526349cc55cSDimitry Andric       {4, MVT::v16i8, 24}, // (load 64i8 and) deinterleave into 4 x 16i8
6527349cc55cSDimitry Andric       {4, MVT::v32i8, 56}, // (load 128i8 and) deinterleave into 4 x 32i8
6528349cc55cSDimitry Andric 
6529349cc55cSDimitry Andric       {4, MVT::v2i16, 6},    // (load 8i16 and) deinterleave into 4 x 2i16
6530349cc55cSDimitry Andric       {4, MVT::v4i16, 17},   // (load 16i16 and) deinterleave into 4 x 4i16
6531349cc55cSDimitry Andric       {4, MVT::v8i16, 33},   // (load 32i16 and) deinterleave into 4 x 8i16
6532349cc55cSDimitry Andric       {4, MVT::v16i16, 75},  // (load 64i16 and) deinterleave into 4 x 16i16
6533349cc55cSDimitry Andric       {4, MVT::v32i16, 150}, // (load 128i16 and) deinterleave into 4 x 32i16
6534349cc55cSDimitry Andric 
6535349cc55cSDimitry Andric       {4, MVT::v2i32, 4},   // (load 8i32 and) deinterleave into 4 x 2i32
6536349cc55cSDimitry Andric       {4, MVT::v4i32, 8},   // (load 16i32 and) deinterleave into 4 x 4i32
6537349cc55cSDimitry Andric       {4, MVT::v8i32, 16},  // (load 32i32 and) deinterleave into 4 x 8i32
6538349cc55cSDimitry Andric       {4, MVT::v16i32, 32}, // (load 64i32 and) deinterleave into 4 x 16i32
6539349cc55cSDimitry Andric       {4, MVT::v32i32, 68}, // (load 128i32 and) deinterleave into 4 x 32i32
6540349cc55cSDimitry Andric 
6541349cc55cSDimitry Andric       {4, MVT::v2i64, 6},  // (load 8i64 and) deinterleave into 4 x 2i64
6542349cc55cSDimitry Andric       {4, MVT::v4i64, 8},  // (load 16i64 and) deinterleave into 4 x 4i64
6543349cc55cSDimitry Andric       {4, MVT::v8i64, 20}, // (load 32i64 and) deinterleave into 4 x 8i64
6544349cc55cSDimitry Andric       {4, MVT::v16i64, 40}, // (load 64i64 and) deinterleave into 4 x 16i64
6545349cc55cSDimitry Andric 
6546349cc55cSDimitry Andric       {6, MVT::v2i8, 6},   // (load 12i8 and) deinterleave into 6 x 2i8
6547349cc55cSDimitry Andric       {6, MVT::v4i8, 14},  // (load 24i8 and) deinterleave into 6 x 4i8
6548349cc55cSDimitry Andric       {6, MVT::v8i8, 18},  // (load 48i8 and) deinterleave into 6 x 8i8
6549349cc55cSDimitry Andric       {6, MVT::v16i8, 43}, // (load 96i8 and) deinterleave into 6 x 16i8
6550349cc55cSDimitry Andric       {6, MVT::v32i8, 82}, // (load 192i8 and) deinterleave into 6 x 32i8
6551349cc55cSDimitry Andric 
6552349cc55cSDimitry Andric       {6, MVT::v2i16, 13},   // (load 12i16 and) deinterleave into 6 x 2i16
6553349cc55cSDimitry Andric       {6, MVT::v4i16, 9},    // (load 24i16 and) deinterleave into 6 x 4i16
6554349cc55cSDimitry Andric       {6, MVT::v8i16, 39},   // (load 48i16 and) deinterleave into 6 x 8i16
6555349cc55cSDimitry Andric       {6, MVT::v16i16, 106}, // (load 96i16 and) deinterleave into 6 x 16i16
6556349cc55cSDimitry Andric       {6, MVT::v32i16, 212}, // (load 192i16 and) deinterleave into 6 x 32i16
6557349cc55cSDimitry Andric 
6558349cc55cSDimitry Andric       {6, MVT::v2i32, 6},   // (load 12i32 and) deinterleave into 6 x 2i32
6559349cc55cSDimitry Andric       {6, MVT::v4i32, 15},  // (load 24i32 and) deinterleave into 6 x 4i32
6560349cc55cSDimitry Andric       {6, MVT::v8i32, 31},  // (load 48i32 and) deinterleave into 6 x 8i32
6561349cc55cSDimitry Andric       {6, MVT::v16i32, 64}, // (load 96i32 and) deinterleave into 6 x 16i32
6562349cc55cSDimitry Andric 
6563349cc55cSDimitry Andric       {6, MVT::v2i64, 6},  // (load 12i64 and) deinterleave into 6 x 2i64
6564349cc55cSDimitry Andric       {6, MVT::v4i64, 18}, // (load 24i64 and) deinterleave into 6 x 4i64
6565349cc55cSDimitry Andric       {6, MVT::v8i64, 36}, // (load 48i64 and) deinterleave into 6 x 8i64
6566349cc55cSDimitry Andric 
6567349cc55cSDimitry Andric       {8, MVT::v8i32, 40} // (load 64i32 and) deinterleave into 8 x 8i32
6568349cc55cSDimitry Andric   };
6569349cc55cSDimitry Andric 
6570349cc55cSDimitry Andric   static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6571349cc55cSDimitry Andric       {2, MVT::v4i16, 2},   // (load 8i16 and) deinterleave into 2 x 4i16
6572349cc55cSDimitry Andric   };
6573349cc55cSDimitry Andric 
6574349cc55cSDimitry Andric   static const CostTblEntry SSE2InterleavedLoadTbl[] = {
6575349cc55cSDimitry Andric       {2, MVT::v2i16, 2},   // (load 4i16 and) deinterleave into 2 x 2i16
6576349cc55cSDimitry Andric       {2, MVT::v4i16, 7},   // (load 8i16 and) deinterleave into 2 x 4i16
6577349cc55cSDimitry Andric 
6578349cc55cSDimitry Andric       {2, MVT::v2i32, 2},   // (load 4i32 and) deinterleave into 2 x 2i32
6579349cc55cSDimitry Andric       {2, MVT::v4i32, 2},   // (load 8i32 and) deinterleave into 2 x 4i32
6580349cc55cSDimitry Andric 
6581349cc55cSDimitry Andric       {2, MVT::v2i64, 2},   // (load 4i64 and) deinterleave into 2 x 2i64
6582349cc55cSDimitry Andric   };
6583349cc55cSDimitry Andric 
6584349cc55cSDimitry Andric   static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6585349cc55cSDimitry Andric       {2, MVT::v16i8, 3}, // interleave 2 x 16i8 into 32i8 (and store)
6586349cc55cSDimitry Andric       {2, MVT::v32i8, 4}, // interleave 2 x 32i8 into 64i8 (and store)
6587349cc55cSDimitry Andric 
6588349cc55cSDimitry Andric       {2, MVT::v8i16, 3},  // interleave 2 x 8i16 into 16i16 (and store)
6589349cc55cSDimitry Andric       {2, MVT::v16i16, 4}, // interleave 2 x 16i16 into 32i16 (and store)
6590349cc55cSDimitry Andric       {2, MVT::v32i16, 8}, // interleave 2 x 32i16 into 64i16 (and store)
6591349cc55cSDimitry Andric 
6592349cc55cSDimitry Andric       {2, MVT::v4i32, 2},   // interleave 2 x 4i32 into 8i32 (and store)
6593349cc55cSDimitry Andric       {2, MVT::v8i32, 4},   // interleave 2 x 8i32 into 16i32 (and store)
6594349cc55cSDimitry Andric       {2, MVT::v16i32, 8},  // interleave 2 x 16i32 into 32i32 (and store)
6595349cc55cSDimitry Andric       {2, MVT::v32i32, 16}, // interleave 2 x 32i32 into 64i32 (and store)
6596349cc55cSDimitry Andric 
6597349cc55cSDimitry Andric       {2, MVT::v2i64, 2},   // interleave 2 x 2i64 into 4i64 (and store)
6598349cc55cSDimitry Andric       {2, MVT::v4i64, 4},   // interleave 2 x 4i64 into 8i64 (and store)
6599349cc55cSDimitry Andric       {2, MVT::v8i64, 8},   // interleave 2 x 8i64 into 16i64 (and store)
6600349cc55cSDimitry Andric       {2, MVT::v16i64, 16}, // interleave 2 x 16i64 into 32i64 (and store)
6601349cc55cSDimitry Andric       {2, MVT::v32i64, 32}, // interleave 2 x 32i64 into 64i64 (and store)
6602349cc55cSDimitry Andric 
6603349cc55cSDimitry Andric       {3, MVT::v2i8, 4},   // interleave 3 x 2i8 into 6i8 (and store)
6604349cc55cSDimitry Andric       {3, MVT::v4i8, 4},   // interleave 3 x 4i8 into 12i8 (and store)
6605349cc55cSDimitry Andric       {3, MVT::v8i8, 6},   // interleave 3 x 8i8 into 24i8 (and store)
6606349cc55cSDimitry Andric       {3, MVT::v16i8, 11}, // interleave 3 x 16i8 into 48i8 (and store)
6607349cc55cSDimitry Andric       {3, MVT::v32i8, 13}, // interleave 3 x 32i8 into 96i8 (and store)
6608349cc55cSDimitry Andric 
6609349cc55cSDimitry Andric       {3, MVT::v2i16, 4},   // interleave 3 x 2i16 into 6i16 (and store)
6610349cc55cSDimitry Andric       {3, MVT::v4i16, 6},   // interleave 3 x 4i16 into 12i16 (and store)
6611349cc55cSDimitry Andric       {3, MVT::v8i16, 12},  // interleave 3 x 8i16 into 24i16 (and store)
6612349cc55cSDimitry Andric       {3, MVT::v16i16, 27}, // interleave 3 x 16i16 into 48i16 (and store)
6613349cc55cSDimitry Andric       {3, MVT::v32i16, 54}, // interleave 3 x 32i16 into 96i16 (and store)
6614349cc55cSDimitry Andric 
6615349cc55cSDimitry Andric       {3, MVT::v2i32, 4},   // interleave 3 x 2i32 into 6i32 (and store)
6616349cc55cSDimitry Andric       {3, MVT::v4i32, 5},   // interleave 3 x 4i32 into 12i32 (and store)
6617349cc55cSDimitry Andric       {3, MVT::v8i32, 11},  // interleave 3 x 8i32 into 24i32 (and store)
6618349cc55cSDimitry Andric       {3, MVT::v16i32, 22}, // interleave 3 x 16i32 into 48i32 (and store)
6619349cc55cSDimitry Andric       {3, MVT::v32i32, 48}, // interleave 3 x 32i32 into 96i32 (and store)
6620349cc55cSDimitry Andric 
6621349cc55cSDimitry Andric       {3, MVT::v2i64, 4},   // interleave 3 x 2i64 into 6i64 (and store)
6622349cc55cSDimitry Andric       {3, MVT::v4i64, 6},   // interleave 3 x 4i64 into 12i64 (and store)
6623349cc55cSDimitry Andric       {3, MVT::v8i64, 12},  // interleave 3 x 8i64 into 24i64 (and store)
6624349cc55cSDimitry Andric       {3, MVT::v16i64, 24}, // interleave 3 x 16i64 into 48i64 (and store)
6625349cc55cSDimitry Andric 
6626349cc55cSDimitry Andric       {4, MVT::v2i8, 4},   // interleave 4 x 2i8 into 8i8 (and store)
6627349cc55cSDimitry Andric       {4, MVT::v4i8, 4},   // interleave 4 x 4i8 into 16i8 (and store)
6628349cc55cSDimitry Andric       {4, MVT::v8i8, 4},   // interleave 4 x 8i8 into 32i8 (and store)
6629349cc55cSDimitry Andric       {4, MVT::v16i8, 8},  // interleave 4 x 16i8 into 64i8 (and store)
6630349cc55cSDimitry Andric       {4, MVT::v32i8, 12}, // interleave 4 x 32i8 into 128i8 (and store)
6631349cc55cSDimitry Andric 
6632349cc55cSDimitry Andric       {4, MVT::v2i16, 2},   // interleave 4 x 2i16 into 8i16 (and store)
6633349cc55cSDimitry Andric       {4, MVT::v4i16, 6},   // interleave 4 x 4i16 into 16i16 (and store)
6634349cc55cSDimitry Andric       {4, MVT::v8i16, 10},  // interleave 4 x 8i16 into 32i16 (and store)
6635349cc55cSDimitry Andric       {4, MVT::v16i16, 32}, // interleave 4 x 16i16 into 64i16 (and store)
6636349cc55cSDimitry Andric       {4, MVT::v32i16, 64}, // interleave 4 x 32i16 into 128i16 (and store)
6637349cc55cSDimitry Andric 
6638349cc55cSDimitry Andric       {4, MVT::v2i32, 5},   // interleave 4 x 2i32 into 8i32 (and store)
6639349cc55cSDimitry Andric       {4, MVT::v4i32, 6},   // interleave 4 x 4i32 into 16i32 (and store)
6640349cc55cSDimitry Andric       {4, MVT::v8i32, 16},  // interleave 4 x 8i32 into 32i32 (and store)
6641349cc55cSDimitry Andric       {4, MVT::v16i32, 32}, // interleave 4 x 16i32 into 64i32 (and store)
6642349cc55cSDimitry Andric       {4, MVT::v32i32, 64}, // interleave 4 x 32i32 into 128i32 (and store)
6643349cc55cSDimitry Andric 
6644349cc55cSDimitry Andric       {4, MVT::v2i64, 6},  // interleave 4 x 2i64 into 8i64 (and store)
6645349cc55cSDimitry Andric       {4, MVT::v4i64, 8},  // interleave 4 x 4i64 into 16i64 (and store)
6646349cc55cSDimitry Andric       {4, MVT::v8i64, 20}, // interleave 4 x 8i64 into 32i64 (and store)
6647349cc55cSDimitry Andric       {4, MVT::v16i64, 40}, // interleave 4 x 16i64 into 64i64 (and store)
6648349cc55cSDimitry Andric 
6649349cc55cSDimitry Andric       {6, MVT::v2i8, 7},   // interleave 6 x 2i8 into 12i8 (and store)
6650349cc55cSDimitry Andric       {6, MVT::v4i8, 9},   // interleave 6 x 4i8 into 24i8 (and store)
6651349cc55cSDimitry Andric       {6, MVT::v8i8, 16},  // interleave 6 x 8i8 into 48i8 (and store)
6652349cc55cSDimitry Andric       {6, MVT::v16i8, 27}, // interleave 6 x 16i8 into 96i8 (and store)
6653349cc55cSDimitry Andric       {6, MVT::v32i8, 90}, // interleave 6 x 32i8 into 192i8 (and store)
6654349cc55cSDimitry Andric 
6655349cc55cSDimitry Andric       {6, MVT::v2i16, 10},  // interleave 6 x 2i16 into 12i16 (and store)
6656349cc55cSDimitry Andric       {6, MVT::v4i16, 15},  // interleave 6 x 4i16 into 24i16 (and store)
6657349cc55cSDimitry Andric       {6, MVT::v8i16, 21},  // interleave 6 x 8i16 into 48i16 (and store)
6658349cc55cSDimitry Andric       {6, MVT::v16i16, 58}, // interleave 6 x 16i16 into 96i16 (and store)
6659349cc55cSDimitry Andric       {6, MVT::v32i16, 90}, // interleave 6 x 32i16 into 192i16 (and store)
6660349cc55cSDimitry Andric 
6661349cc55cSDimitry Andric       {6, MVT::v2i32, 9},   // interleave 6 x 2i32 into 12i32 (and store)
6662349cc55cSDimitry Andric       {6, MVT::v4i32, 12},  // interleave 6 x 4i32 into 24i32 (and store)
6663349cc55cSDimitry Andric       {6, MVT::v8i32, 33},  // interleave 6 x 8i32 into 48i32 (and store)
6664349cc55cSDimitry Andric       {6, MVT::v16i32, 66}, // interleave 6 x 16i32 into 96i32 (and store)
6665349cc55cSDimitry Andric 
6666349cc55cSDimitry Andric       {6, MVT::v2i64, 8},  // interleave 6 x 2i64 into 12i64 (and store)
6667349cc55cSDimitry Andric       {6, MVT::v4i64, 15}, // interleave 6 x 4i64 into 24i64 (and store)
6668349cc55cSDimitry Andric       {6, MVT::v8i64, 30}, // interleave 6 x 8i64 into 48i64 (and store)
6669349cc55cSDimitry Andric   };
6670349cc55cSDimitry Andric 
6671349cc55cSDimitry Andric   static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6672349cc55cSDimitry Andric       {2, MVT::v2i8, 1},   // interleave 2 x 2i8 into 4i8 (and store)
6673349cc55cSDimitry Andric       {2, MVT::v4i8, 1},   // interleave 2 x 4i8 into 8i8 (and store)
6674349cc55cSDimitry Andric       {2, MVT::v8i8, 1},   // interleave 2 x 8i8 into 16i8 (and store)
6675349cc55cSDimitry Andric 
6676349cc55cSDimitry Andric       {2, MVT::v2i16, 1},  // interleave 2 x 2i16 into 4i16 (and store)
6677349cc55cSDimitry Andric       {2, MVT::v4i16, 1},  // interleave 2 x 4i16 into 8i16 (and store)
6678349cc55cSDimitry Andric 
6679349cc55cSDimitry Andric       {2, MVT::v2i32, 1},  // interleave 2 x 2i32 into 4i32 (and store)
6680349cc55cSDimitry Andric   };
6681349cc55cSDimitry Andric 
6682349cc55cSDimitry Andric   if (Opcode == Instruction::Load) {
6683349cc55cSDimitry Andric     auto GetDiscountedCost = [Factor, NumMembers = Indices.size(),
6684349cc55cSDimitry Andric                               MemOpCosts](const CostTblEntry *Entry) {
6685349cc55cSDimitry Andric       // NOTE: this is just an approximation!
6686349cc55cSDimitry Andric       //       It can over/under -estimate the cost!
6687349cc55cSDimitry Andric       return MemOpCosts + divideCeil(NumMembers * Entry->Cost, Factor);
6688349cc55cSDimitry Andric     };
6689349cc55cSDimitry Andric 
66900b57cec5SDimitry Andric     if (ST->hasAVX2())
6691349cc55cSDimitry Andric       if (const auto *Entry = CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6692349cc55cSDimitry Andric                                               ETy.getSimpleVT()))
6693349cc55cSDimitry Andric         return GetDiscountedCost(Entry);
6694349cc55cSDimitry Andric 
6695349cc55cSDimitry Andric     if (ST->hasSSSE3())
6696349cc55cSDimitry Andric       if (const auto *Entry = CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6697349cc55cSDimitry Andric                                               ETy.getSimpleVT()))
6698349cc55cSDimitry Andric         return GetDiscountedCost(Entry);
6699349cc55cSDimitry Andric 
6700349cc55cSDimitry Andric     if (ST->hasSSE2())
6701349cc55cSDimitry Andric       if (const auto *Entry = CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6702349cc55cSDimitry Andric                                               ETy.getSimpleVT()))
6703349cc55cSDimitry Andric         return GetDiscountedCost(Entry);
6704349cc55cSDimitry Andric   } else {
6705349cc55cSDimitry Andric     assert(Opcode == Instruction::Store &&
6706349cc55cSDimitry Andric            "Expected Store Instruction at this point");
6707349cc55cSDimitry Andric     assert((!Indices.size() || Indices.size() == Factor) &&
6708349cc55cSDimitry Andric            "Interleaved store only supports fully-interleaved groups.");
6709349cc55cSDimitry Andric     if (ST->hasAVX2())
6710349cc55cSDimitry Andric       if (const auto *Entry = CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6711349cc55cSDimitry Andric                                               ETy.getSimpleVT()))
6712349cc55cSDimitry Andric         return MemOpCosts + Entry->Cost;
6713349cc55cSDimitry Andric 
6714349cc55cSDimitry Andric     if (ST->hasSSE2())
6715349cc55cSDimitry Andric       if (const auto *Entry = CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6716349cc55cSDimitry Andric                                               ETy.getSimpleVT()))
6717349cc55cSDimitry Andric         return MemOpCosts + Entry->Cost;
6718349cc55cSDimitry Andric   }
67190b57cec5SDimitry Andric 
67200b57cec5SDimitry Andric   return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
67215ffd83dbSDimitry Andric                                            Alignment, AddressSpace, CostKind,
67220b57cec5SDimitry Andric                                            UseMaskForCond, UseMaskForGaps);
67230b57cec5SDimitry Andric }
6724bdd1243dSDimitry Andric 
6725bdd1243dSDimitry Andric InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
6726*0fca6ea1SDimitry Andric                                                  StackOffset BaseOffset,
6727bdd1243dSDimitry Andric                                                  bool HasBaseReg, int64_t Scale,
6728bdd1243dSDimitry Andric                                                  unsigned AddrSpace) const {
6729bdd1243dSDimitry Andric   // Scaling factors are not free at all.
6730bdd1243dSDimitry Andric   // An indexed folded instruction, i.e., inst (reg1, reg2, scale),
6731bdd1243dSDimitry Andric   // will take 2 allocations in the out of order engine instead of 1
6732bdd1243dSDimitry Andric   // for plain addressing mode, i.e. inst (reg1).
6733bdd1243dSDimitry Andric   // E.g.,
6734bdd1243dSDimitry Andric   // vaddps (%rsi,%rdx), %ymm0, %ymm1
6735bdd1243dSDimitry Andric   // Requires two allocations (one for the load, one for the computation)
6736bdd1243dSDimitry Andric   // whereas:
6737bdd1243dSDimitry Andric   // vaddps (%rsi), %ymm0, %ymm1
6738bdd1243dSDimitry Andric   // Requires just 1 allocation, i.e., freeing allocations for other operations
6739bdd1243dSDimitry Andric   // and having less micro operations to execute.
6740bdd1243dSDimitry Andric   //
6741bdd1243dSDimitry Andric   // For some X86 architectures, this is even worse because for instance for
6742bdd1243dSDimitry Andric   // stores, the complex addressing mode forces the instruction to use the
6743bdd1243dSDimitry Andric   // "load" ports instead of the dedicated "store" port.
6744bdd1243dSDimitry Andric   // E.g., on Haswell:
6745bdd1243dSDimitry Andric   // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
6746bdd1243dSDimitry Andric   // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
6747bdd1243dSDimitry Andric   TargetLoweringBase::AddrMode AM;
6748bdd1243dSDimitry Andric   AM.BaseGV = BaseGV;
6749*0fca6ea1SDimitry Andric   AM.BaseOffs = BaseOffset.getFixed();
6750bdd1243dSDimitry Andric   AM.HasBaseReg = HasBaseReg;
6751bdd1243dSDimitry Andric   AM.Scale = Scale;
6752*0fca6ea1SDimitry Andric   AM.ScalableOffset = BaseOffset.getScalable();
6753bdd1243dSDimitry Andric   if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
6754bdd1243dSDimitry Andric     // Scale represents reg2 * scale, thus account for 1
6755bdd1243dSDimitry Andric     // as soon as we use a second register.
6756bdd1243dSDimitry Andric     return AM.Scale != 0;
6757bdd1243dSDimitry Andric   return -1;
6758bdd1243dSDimitry Andric }
6759*0fca6ea1SDimitry Andric 
6760*0fca6ea1SDimitry Andric InstructionCost X86TTIImpl::getBranchMispredictPenalty() const {
6761*0fca6ea1SDimitry Andric   // TODO: Hook MispredictPenalty of SchedMachineModel into this.
6762*0fca6ea1SDimitry Andric   return 14;
6763*0fca6ea1SDimitry Andric }
6764