X86ISelLowering.cpp - OpenGrok cross reference for /llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp

Lines Matching +defs:hi +defs:size
249     // For slow shld targets we only lower for code size.
3435   // Do not merge to float value size (128 bytes) if no implicit
3713   unsigned NumElts = Mask.size();
3719   unsigned NumElts = Mask.size();
3768   unsigned NumElts = Mask.size();
3808   unsigned NumElts = Mask.size();
3827   WidenedMask.assign(Mask.size() / 2, 0);
3828   for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
3869   assert(WidenedMask.size() == Mask.size() / 2 &&
3870          "Incorrect size of mask after widening the elements!");
3884     for (int i = 0, Size = Mask.size(); i != Size; ++i)
3900   unsigned NumSrcElts = Mask.size();
3911   // We have to repeat the widening until we reach the target size, but we can
3914     while (ScaledMask.size() > NumDstElts) {
3971   assert(Bits.size() == Undefs.getBitWidth() &&
3985   for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
4012   APInt Undefs = APInt::getZero(Bits.size());
4107           Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
4114   assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
4150   assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
4154 /// Widen a vector to a larger size with the same scalar type, with the new
4179 /// Widen a vector to a larger size with the same scalar type, with the new
4239         // insert_subvector(insert_subvector(undef, x, lo), y, hi)
4249               LoOps.size() == HiOps.size()) {
4258         // insert_subvector(x, extract_subvector(x, lo), hi)
4264         // insert_subvector(undef, x, hi)
4285   unsigned NumSubOps = SubOps.size();
4375 // Helper for splitting operands of an operation to legal target size and
4392       assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size");
4397       assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size");
4402       assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size");
4689            "Expected VTs to be the same size!");
4746     SmallVector<SDValue> Ops(Mask.size(), DAG.getUNDEF(VT.getScalarType()));
4747     for (int I = 0, NumElts = Mask.size(); I != NumElts; ++I) {
4910   // Bitcast a source array of element bits to the target size.
4922     // If we're already the right size, don't bother bitcasting.
5030       APInt UndefSrcElts = APInt::getZero(SrcEltBits.size());
5031       for (unsigned I = 0, E = SrcEltBits.size(); I != E; ++I)
5151       for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
5234     for (int i = 0, e = EltBits.size(); i != e; ++i) {
5282   for (unsigned I = 0, E = EltBits.size(); I != E; ++I)
5717       if (M >= (int)Mask.size())
5718         M -= Mask.size();
5750   int Size = Mask.size();
5761   assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
5841   int Size = Mask.size();
5927          "Different mask size from vector size!");
5936   unsigned NumElts = Mask.size();
5938          KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch");
5952   unsigned NumElts = Mask.size();
6018   assert(NumElts == DemandedElts.getBitWidth() && "Unexpected vector size");
6051     for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
6078     size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
6080     narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
6081     narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
6168     if (SubMask.size() != NumSubElts) {
6169       assert(((SubMask.size() % NumSubElts) == 0 ||
6170               (NumSubElts % SubMask.size()) == 0) && "Illegal submask scale");
6171       if ((NumSubElts % SubMask.size()) == 0) {
6172         int Scale = NumSubElts / SubMask.size();
6177         int Scale = SubMask.size() / NumSubElts;
6178         NumSubElts = SubMask.size();
6502   int MaskWidth = Mask.size();
6504   for (int i = 0, e = Inputs.size(); i < e; ++i) {
6505     int lo = UsedInputs.size() * MaskWidth;
6506     int hi = lo + MaskWidth;
6511         if ((lo <= M) && (M < hi))
6515     if (none_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
6524     for (int j = 0, ue = UsedInputs.size(); j != ue; ++j) {
6529           M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth);
6888   assert(Zeroable.size() - Zeroable.count() > 1 &&
7127   unsigned NumElems = Elts.size();
7183          "Register/Memory size mismatch");
7414   assert(Elts.size() == VT.getVectorNumElements());
7430       assert(ScalarSize == 64 && "Unsupported floating point scalar size");
7437   for (unsigned I = 0, E = Bits.size(); I != E; ++I)
7454       assert(ScalarSize == 64 && "Unsupported floating point scalar size");
7523     assert((NumElts % Sequence.size()) == 0 && "Sequence doesn't fit.");
7524     if (Sequence.size() == 1)
7536     unsigned SeqLen = Sequence.size();
7643   // When optimizing for size, generate up to 5 extra bytes for a broadcast
7646   // it may be detrimental to overall size. There needs to be a way to detect
7647   // that condition to know if this is truly a size win.
7654   // But override that restriction when optimizing for size.
7661     // For size optimization, also splat v2f64 and v2i64, and for size opt
7793       if (InsertIndices.size() > 1)
8422   // If either input vector is not the same size as the build vector,
8423   // extract/insert the low bits to the correct size.
8691   // Adjust IndicesVec to match VT size.
8693          "Illegal variable permute mask size");
8695     // Narrow/widen the indices vector to the correct size.
9244   if (Values.size() == 1) {
9277   if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) {
9383   assert(Values.size() > 1 && "Expected non-undef and non-splat vector");
9427     Mask.append(NumElems - Mask.size(), SM_SentinelUndef);
9616   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
9634          "Illegal shuffle lane size");
9636   int Size = Mask.size();
9657          "Illegal shuffle lane size");
9658   int NumElts = Mask.size();
9694   int Size = Mask.size();
9745   int Size = Mask.size();
9848   int Size = Mask.size();
9849   if (Size != (int)ExpectedMask.size())
9882   int Size = Mask.size();
9883   if (Size != (int)ExpectedMask.size())
9893   // Don't use V1/V2 if they're not the same size as the shuffle mask type.
9956   // Create 128-bit vector type based on mask size.
9957   MVT EltVT = MVT::getIntegerVT(128 / Mask.size());
9958   MVT VT = MVT::getVectorVT(EltVT, Mask.size());
9980   assert(Mask.size() % 2 == 0 && "Expecting even number of elements in mask");
9981   unsigned HalfSize = Mask.size() / 2;
9998   assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
10027 // Mask elements are assumed to be -1, 0 or 1 to match the SHUFPD lo/hi pattern.
10029   assert((Mask.size() == 2 || Mask.size() == 4 || Mask.size() == 8) &&
10030          "Unexpected SHUFPD mask size");
10037   assert(0 <= FirstIndex && FirstIndex < (int)Mask.size() &&
10044     for (unsigned I = 0, E = Mask.size(); I != E; ++I)
10052   for (unsigned I = 0, E = Mask.size(); I != E; ++I)
10075   for (int i = 0, e = Mask.size(); i < e; i++) {
10101   int Size = Mask.size();
10196   // Attempt to match the target mask against the unpack lo/hi mask patterns.
10216   // If an unary shuffle, attempt to match as an unpack lo/hi with zero.
10325   unsigned NumElts = Mask.size();
10576   int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
10578          "We should only be called with masks with a power-of-2 size!");
10588   for (int i = 0, e = Mask.size(); i < e; ++i) {
10595     for (unsigned j = 0; j != std::size(ViableForN); ++j)
10610   for (unsigned j = 0; j != std::size(ViableForN); ++j)
10720   // Repeatedly pack down to the target size.
10754     MaskVT = MVT::getVectorVT(EltVT, Mask.size());
10763         MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size());
10769   SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
10771   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
10806   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
10832   assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask");
10834   int NumElts = Mask.size();
10947       assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!");
11022     for (int i = 0, Size = Mask.size(); i < Size; ++i)
11042     // Attempt to lower to a bitmask if we can. Only if not optimizing for size.
11073   SmallVector<int, 32> BlendMask(Mask.size(), -1);
11074   SmallVector<int, 32> PermuteMask(Mask.size(), -1);
11076   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
11109   int NumElts = Mask.size();
11193   int Size = Mask.size();
11194   assert(Mask.size() >= 2 && "Single element masks are invalid.");
11406   return NumUndefs <= Mask.size() / 2 && UniqueElt.has_value();
11419   int NumElts = Mask.size();
11543   // AVX512 only has vXi32/vXi64 rotates, so limit the rotation sub group size.
11550   unsigned NumElts = Mask.size();
11603   int NumElts = Mask.size();
11703   int NumElts = RepeatedMask.size();
11736   assert(Mask.size() <= 16 &&
11787   unsigned NumElts = Mask.size();
11831   unsigned NumElts = Mask.size();
11907   int Size = Mask.size();
11946   // keep doubling the size of the integer elements up to that. We can
11970   int Size = Mask.size();
11971   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
12007   int Size = Mask.size();
12009   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
12063   int Size = Mask.size();
12065   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
12232     assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
12327   assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");
12401            "The input vector size must be divisible by the extended size.");
12442   // If the bitcasts shift the element size, we can't extract an equivalent
12450     // Ensure the scalar operand is the same size as the destination.
12493       find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) -
12497   for (int i = 0, Size = Mask.size(); i < Size; ++i)
12516   SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
12545   } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
12548     // element size is too small to use VZEXT_MOVL to clear the high bits.
12588       SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
12661   assert(Mask.size() == 4 && "Unsupported mask size!");
12684   int Size = Mask.size();
12771   assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
12850     // is expected to be a win for code size, register pressure, and possibly
12912              "Unexpected vector size");
12931            "Unexpected scalar size");
12962   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
13073   assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
13157   assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
13349   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
13453   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
13594   assert(Mask.size() == 8 && "Shuffle mask length doesn't match!");
13621   int NumHToL = LoInputs.size() - NumLToL;
13623   int NumHToH = HiInputs.size() - NumLToH;
13657       for (int j = 0, e = DWordPairs.size(); j < e; ++j) {
13669         PSHUFDMask[DWord] = DOffset + DWordPairs.size();
13674     if (DWordPairs.size() <= 2) {
13718     assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) &&
13720     assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) &&
13722     assert(AToAInputs.size() + BToAInputs.size() == 4 &&
13725     bool ThreeAInputs = AToAInputs.size() == 3;
13751     if (BToBInputs.size() == 2 && AToBInputs.size() == 2) {
13797               BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
13849     if (InPlaceInputs.size() == 1) {
13864     assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
13949     if (IncomingInputs.size() == 1) {
13959     } else if (IncomingInputs.size() == 2) {
14026       llvm_unreachable("Unhandled input size!");
14089   int Size = Mask.size();
14151   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
14355   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
14438   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
14521       bool TargetLo = LoInputs.size() >= HiInputs.size();
14532       for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) {
14832   ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2);
14833   ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2);
14960   int Size = Mask.size();
15052   /// Attempts to find a sublane permute with the given size
15157   int Size = Mask.size();
15180   int Size = Mask.size();
15213   computeInLaneShuffleMask(Mask, Mask.size() / 2, InLaneMask);
15277   // TODO: If minimizing size and one of the inputs is a zero vector and the
15362   int NumElts = Mask.size();
15402       assert(M1.size() == M2.size() && "Unexpected mask size");
15403       for (int i = 0, e = M1.size(); i != e; ++i)
15410       assert(Mask.size() == MergedMask.size() && "Unexpected mask size");
15411       for (int i = 0, e = MergedMask.size(); i != e; ++i) {
15529   assert((Mask.size() == HalfMask.size() * 2) &&
15538   unsigned HalfNumElts = HalfMask.size();
15659   assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
16017   if (Zeroable.countl_one() < (Mask.size() - 8))
16063     size_t Size = Mask.size();
16064     assert(Size % 2 == 0 && "Expected even mask size");
16087   // Limit user size to two for now.
16088   if (Shuffles.size() != 2)
16132   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
16252   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
16365   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
16378     computeInLaneShuffleMask(Mask, Mask.size() / 2, InLaneMask);
16393     assert(RepeatedMask.size() == 4 &&
16487   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
16542     assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
16629   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
16752   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
16945          "Unexpected element type size for 128bit shuffle.");
16949   assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");
16955   assert(Widened128Mask.size() == 4 && "Shuffle widening mismatch");
17052   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
17106   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
17112     assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
17173   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
17246   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
17276     assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
17343   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
17414   assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
17516   int NumElts = Mask.size();
17589   int NumElts = Mask.size();
17623   int Size = Mask.size();
17663   int NumElts = Mask.size();
17715         // Shift left to put the original vector in the MSBs of the new size.
17797   int NumElements = Mask.size();
17835       for (int i = 0, Size = Mask.size(); i < Size; ++i)
17844         for (int i = 0, Size = Mask.size(); i < Size; ++i)
17961   int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
18038   assert(NumElements == (int)Mask.size() &&
18040          "shouldn't alter the shuffle mask size");
18203   // SEXT/TRUNC cases where the mask doesn't match the destination size.
18761         // But if optimizing for size and there's a load folding opportunity,
19858 /// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
20112   //     uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
20116   //     uint4 hi = (v >> 16) | (uint4) 0x53000000;
20118   //     float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
20154     //     uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
20168     //     uint4 hi = (v >> 16) | (uint4) 0x53000000;
20176   //     float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
20326   // Build a 64 bit pair (FF, 0) in the constant pool, with FF in the hi bits.
21704   // If the saturation width is smaller than the size of the temporary result,
21786   // For signed conversions where we saturate to the same size as the
22105 /// Depending on uarch and/or optimizing for size, we might prefer to use a
22179 /// Depending on uarch and/or optimizing for size, we might prefer to use a
22241   // the logic op, so it can save (~4 bytes) on code size.
22368   // Also promote i16 to i32 for performance / code size reason.
22616   for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
22736   // If the input vector has vector elements wider than the target test size,
22875     for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1;
23391       // are optimizing for size and the immedaite won't fit in a byte.
24287     // encoding size - so it must either already be a i8 or i32 immediate, or it
24467       // we need mask of all zeros or ones with same size of the other
24627       // of 3 logic instructions for size savings and potentially speed.
25020     // MSBs of the new element size.
25096 /// Change a vector store into a pair of half-size vector stores.
25829   assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!");
25909         "querying registration node size for function without personality");
28599   // into lo/hi nibbles and use the PSHUFB LUT to perform CTLZ on each of them.
28600   // If the hi input nibble is zero then we add both results together, otherwise
28601   // we just take the hi result (by masking the lo result to zero before the
28623   // Merge result back from vXi8 back to VT, working on the lo/hi halves
29195     // Extract the lo/hi parts to any extend to i16.
29223     // Multiply, mask the lower 8bits of the lo/hi results and pack.
29283   // Only multiply lo/hi halves that aren't known to be zero.
29373   // Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and
30193   if (UniqueCstAmt.size() == 2 &&
30243       TmpAmtWideElts.resize(AmtWideElts.size() / 2);
30245       for (unsigned SrcI = 0, E = AmtWideElts.size(); SrcI != E; SrcI += 2) {
30285     if (WideEltSizeInBits * AmtWideElts.size() >= 512 &&
30291     if (WideEltSizeInBits * AmtWideElts.size() >= 512 && IsConstantSplat) {
30304     if (WideEltSizeInBits * AmtWideElts.size() >= 256 &&
30308     unsigned WideNumElts = AmtWideElts.size();
30502     if (LUT.size() == NumElts) {
30942   // Expand slow SHLD/SHRD cases if we are not optimizing for size.
31262   // v8i16/v16i16: perform unsigned multiply hi/lo and OR the results.
31909   unsigned size = 0;
31912   case MVT::i8:  Reg = X86::AL;  size = 1; break;
31913   case MVT::i16: Reg = X86::AX;  size = 2; break;
31914   case MVT::i32: Reg = X86::EAX; size = 4; break;
31917     Reg = X86::RAX; size = 8;
31925                     DAG.getTargetConstant(size, DL, MVT::i8),
32050   assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!");
33109       I = AsmStr.size() - OpNoStr1.size();
33864       // Preserve what we know about the size of the original result. If the
36041   // Set the call frame size on entry to the new basic blocks.
36138   // test rsp size
37131   LPadList.reserve(CallSiteNumToLPad.size());
37196       .addImm(LPadList.size());
38076   // Truncate the width to size to handle illegal types.
38606       unsigned NumOps = Ops.size();
38608       if (Mask.size() == NumElts) {
38674     // PACKSS is just a truncation if the sign bits extend to the packed size.
38775       unsigned NumOps = Ops.size();
38777       if (Mask.size() == NumElts) {
38848   unsigned NumMaskElts = Mask.size();
39004   unsigned NumMaskElts = Mask.size();
39033       ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
39035       for (int i = 0, e = Mask.size(); i != e; ++i) {
39152   unsigned NumMaskElts = Mask.size();
39347   unsigned NumMaskElts = Mask.size();
39396           assert(RepeatedMask.size() == 8 &&
39397                  "Repeated mask size doesn't match!");
39527   assert((Inputs.size() == 1 || Inputs.size() == 2) &&
39546   bool UnaryShuffle = (Inputs.size() == 1);
39554          (RootSizeInBits % VT2.getSizeInBits()) == 0 && "Vector size mismatch");
39558   unsigned NumBaseMaskElts = BaseMask.size();
39570   // Don't combine if we are a AVX512/EVEX target and the mask element size
39571   // is different from the root element size - this would prevent writemasks
39598     if (Mask.size() <= NumElts &&
39628     assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
39713     // Prefer blends for sequential shuffles unless we are optimizing for size.
39750     assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
39760   if (IsMaskedShuffle && NumRootElts > Mask.size()) {
39761     assert((NumRootElts % Mask.size()) == 0 && "Illegal mask size");
39762     int MaskScale = NumRootElts / Mask.size();
39768   unsigned NumMaskElts = Mask.size();
40266 // type size to attempt to combine:
40275   unsigned NumMaskElts = BaseMask.size();
40276   unsigned NumInputs = Inputs.size();
40338           int lo = I * WideMask.size();
40339           int hi = (I + 1) * WideMask.size();
40341             if (lo <= M && M < hi)
40365   if (AdjustedMasks == 0 || WideInputs.size() > 2)
40371   // so it will reduce the size of the mask to the minimal width mask which
40373   while (WideMask.size() > 1) {
40382   if (WideInputs.size() == 2 && canonicalizeShuffleMaskWithCommute(WideMask)) {
40450   unsigned EltSizeInBits = RootSizeInBits / Mask.size();
40453       (isPack || shouldUseHorizontalOp(Ops.size() == 1, DAG, Subtarget))) {
40485       if (Ops.size() >= 2) {
40523   if (2 < Ops.size())
40526   SDValue BC1 = BC[BC.size() - 1];
40527   if (Mask.size() == VT0.getVectorNumElements()) {
40531     if (Ops.size() == 2) {
40578     bool SingleOp = (Ops.size() == 1);
40600   if (Ops.size() == 1 && NumLanes == 2 &&
40628   unsigned NumMaskElts = Mask.size();
40630   unsigned NumOps = Ops.size();
40642   // If we're optimizing for size, only fold if at least one of the constants is
40724 ///    equivalent. In most cases, this is just an encoding size win, but
40751          (RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
40776          "Can only combine shuffles upto size of the root op.");
40779   APInt OpDemandedElts = APInt::getZero(RootMask.size());
40781     int BaseIdx = RootMask.size() * SrcOpIndex;
40782     if (isInRange(M, BaseIdx, BaseIdx + RootMask.size()))
40788     unsigned NumOpMaskElts = RootMask.size() / Scale;
40789     assert((RootMask.size() % Scale) == 0 && "Root mask size mismatch");
40791                .extractBits(RootMask.size() - NumOpMaskElts, NumOpMaskElts)
40830     unsigned OpMaskSize = OpMask.size();
40831     if (OpInputs.size() > 1) {
40850   bool EmptyRoot = (Depth == 0) && (RootMask.size() == 1);
40856       APInt UsedInputs = APInt::getZero(OpInputs.size());
40857       for (int i = 0, e = OpMask.size(); i != e; ++i) {
40861         UsedInputs.setBit(M / OpMask.size());
40882       for (int i = 0, e = Ops.size(); i < e; ++i)
40892       return Ops.size() - 1;
40900     assert(((RootMask.size() > OpMask.size() &&
40901              RootMask.size() % OpMask.size() == 0) ||
40902             (OpMask.size() > RootMask.size() &&
40903              OpMask.size() % RootMask.size() == 0) ||
40904             OpMask.size() == RootMask.size()) &&
40910     assert(llvm::has_single_bit<uint32_t>(RootMask.size()) &&
40912     assert(llvm::has_single_bit<uint32_t>(OpMask.size()) &&
40914     unsigned RootMaskSizeLog2 = llvm::countr_zero(RootMask.size());
40915     unsigned OpMaskSizeLog2 = llvm::countr_zero(OpMask.size());
40917     unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size());
40919         std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2);
40920     unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2);
40972       int InputIdx = OpMask[OpIdx] / (int)OpMask.size();
40982   for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
40988       int Lo = I * Mask.size();
40989       int Hi = (I + 1) * Mask.size();
40990       int NewHi = Lo + (Mask.size() / Scale);
40998   // Peek through any free extract_subvector nodes back to root size.
41013   if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) &&
41031   if (Ops.size() < (MaxDepth - Depth)) {
41032     for (int i = 0, e = Ops.size(); i < e; ++i) {
41063         unsigned EltSizeInBits = RootSizeInBits / Mask.size();
41084     int Lo = OpIdx * Mask.size();
41085     int Hi = Lo + Mask.size();
41088     APInt OpDemandedElts(Mask.size(), 0);
41099       unsigned NumExpectedVectorElts = Mask.size();
41135   if (Ops.size() <= 2) {
41139     // so it will reduce the size of the mask to the minimal width mask which
41141     while (Mask.size() > 1) {
41150     if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
41165     if (Ops.size() != 2 || !Subtarget.hasAVX2() || RootSizeInBits != 128 ||
41166         (RootSizeInBits / Mask.size()) != 64 ||
41956         // Make sure the shift amount and the load size are divisible by 16.
42149           assert(Ops.size() == 2 && LHS == peekThroughOneUseBitcasts(Ops[0]) &&
42180       if (getTargetShuffleMask(N, false, Ops, Mask) && Ops.size() == 2) {
42188               SubOps.size() == 1 && isUndefOrInRange(SubScaledMask, 0, 4)) {
42233           collectConcatOps(LHS.getNode(), LHSOps, DAG) && LHSOps.size() == 2) {
42238           collectConcatOps(RHS.getNode(), RHSOps, DAG) && RHSOps.size() == 2) {
42275       if (collectConcatOps(Src.getNode(), SubOps, DAG) && SubOps.size() == 2)
42326     assert(Mask.size() == 4);
42485       assert(Mask.size() == NumElts && "Unexpected shuffle mask size");
42492           M = (M < 0 ? M : M & (Mask.size() - 1));
42611   unsigned Size = Mask.size();
42832 /// the destination vector, narrow the shuffle to half its original size.
42848   SmallVector<int, 8> HalfMask(Mask.size() / 2);
43676                "Unsupported vector size");
43707   // Shuffle inputs must be the same size as the result.
43708   if (OpMask.size() != (unsigned)NumElts ||
43719   int NumSrcs = OpInputs.size();
44414     int NumOps = ShuffleOps.size();
44415     if (ShuffleMask.size() == (unsigned)NumElts &&
44465       SmallVector<APInt, 2> DemandedSrcElts(Ops.size(),
44472         assert(0 <= M.value() && M.value() < (int)(Ops.size() * NumElts) &&
44547 // Helper to peek through bitops/trunc/setcc to determine size of source vector.
44548 // Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
44703       SubSrcOps.size() >= 2) {
44937   while (Ops.size() > 1) {
44938     unsigned NumOps = Ops.size();
45138     // Don't swap i8/i16 since don't have fp types that size.
45490   // First, reduce the source down to 128-bit, applying BinOp to lo/hi.
45827 // Note: SrcVec might not have a VecVT type, but it must be the same size.
46000   // Shuffle inputs must be the same size as the result.
46006   // Attempt to narrow/widen the shuffle mask to the correct size.
46007   if (Mask.size() != NumSrcElts) {
46008     if ((NumSrcElts % Mask.size()) == 0) {
46010       int Scale = NumSrcElts / Mask.size();
46013     } else if ((Mask.size() % NumSrcElts) == 0) {
46016       int Scale = Mask.size() / NumSrcElts;
46019       for (int i = 0, e = (int)Mask.size(); i != e; ++i)
46024       while (Mask.size() > NumSrcElts &&
46033   if (Mask.size() == NumSrcElts) {
46037     unsigned Scale = Mask.size() / NumSrcElts;
46038     if ((Mask.size() % NumSrcElts) != 0 || SrcVT.isFloatingPoint())
46045     ExtractVT = EVT::getVectorVT(*DAG.getContext(), ExtractSVT, Mask.size());
46058   SDValue SrcOp = Ops[ExtractIdx / Mask.size()];
46059   ExtractIdx = ExtractIdx % Mask.size();
46269   // vXi8 add reduction - sum lo/hi halves then use PSADBW.
46513         (IsVar || BoolExtracts.size() > 1)) {
46580   assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size");
46584     // If the scalar integer is greater than the vector element size, then we
46605     // If we have register broadcast instructions, use the scalar size as the
46615     // element size (we don't care about the upper bits) and broadcast it to all
46677   // are the same size as the select elements. Ie, the condition operand must
48224   assert(CmpBits == CmpVal.getBitWidth() && "Value size mismatch");
48284         Ops.size() == 2) {
48403       ShuffleInputs.size() == 1 && isCompletePermute(ShuffleMask) &&
48746 /// When the operands of vector mul are extended from smaller size values,
49409   // sexts in X86 are MOVs. The MOVs have the same code size
49410   // as above SHIFTs (only SHIFT on 1 has lower code size).
49501   // transform should reduce code size. It may also enable secondary transforms
49555         if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 &&
50005     assert(EltBits.size() == VT.getVectorNumElements() &&
50010     for (unsigned i = 0, e = EltBits.size(); i != e; ++i) {
50495   // Both MOVMSK operands must be from vectors of the same size and same element
50496   // size, but its OK for a fp/int diff.
51101         SrcOps.size() == 1) {
51369   for (unsigned i = 0, e = EltBits0.size(); i != e; ++i) {
51909         SrcOps.size() == 1) {
52047 ///   XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
52175   // the element size of the destination type.
53202       if (!UseSubVector && SrcOps.size() <= 2 &&
53205         N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
53208       if (UseSubVector && SrcOps.size() == 1 &&
53584     // See if this has been extended from a smaller/equal size to
53585     // the truncation size, allowing a truncation to combine with the extend.
53951   // Make sure the element size doesn't change.
54001       for (unsigned I = 0, E = EltBits.size(); I < E; I++)
54606   // Favor a library call when operating on a scalar and minimizing code size.
55093 //        promotion is also good in terms of code-size.
55162   // We can only do this if the vector size in 256 bits or less.
57406   unsigned NumOps = Ops.size();
57480       // concat(extract_subvector(x,lo), extract_subvector(x,hi)) -> x.
57506                              SubVT.getVectorElementCount() * Subs.size());
57523       for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
57674         if (ConcatMask.size() == (NumOps * NumSrcElts)) {
57706         if (ConcatMask.size() == (NumOps * NumSrcElts)) {
58061     if (EltBits.size() == VT.getVectorNumElements()) {
58122     for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
58249     if (SubVectorOps.size() == 2 &&
58852     for (unsigned I = 0, E = LHSBits.size(); I != E; I += 2) {
59075     assert(NumElts == 2 && "Unexpected size");
59115   // The input chain and the size of the memory VT must match.
59555   //       size/speed advantages vs. 32-bit ops, but they do have a major
59632   // Don't use `NotAnd` as even though `not` is generally shorter code size than
59749     S = S.substr(Piece.size());
59762   if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
59767       if (AsmPieces.size() == 3)
59789   switch (AsmPieces.size()) {
59837       if (Constraints.size() >= 2 &&
59838           Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
59839           Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
59889   if (Constraint.size() == 1) {
59927   else if (Constraint.size() == 2) {
60005     if (StringRef(Constraint).size() != 2)
60037     if (StringRef(Constraint).size() != 2)
60358   if (Constraint.size() == 1) {
60567   } else if (Constraint.size() == 2 && Constraint[0] == 'Y') {
60664   } else if (Constraint.size() == 2 && Constraint[0] == 'j') {
60705       if (Constraint.size() == 7 && Constraint[0] == '{' &&
60763   // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
60859   // for code size, we prefer to use a div instruction, as it is usually smaller
60863   // size, because it will have to be scalarized, while the alternative code
61035   // The default stack probe size is 4096 if the function has no stackprobesize
61037   return MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size",