Lines Matching +defs:hi +defs:size

249     // For slow shld targets we only lower for code size.
3435 // Do not merge to float value size (128 bytes) if no implicit
3713 unsigned NumElts = Mask.size();
3719 unsigned NumElts = Mask.size();
3768 unsigned NumElts = Mask.size();
3808 unsigned NumElts = Mask.size();
3827 WidenedMask.assign(Mask.size() / 2, 0);
3828 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
3869 assert(WidenedMask.size() == Mask.size() / 2 &&
3870 "Incorrect size of mask after widening the elements!");
3884 for (int i = 0, Size = Mask.size(); i != Size; ++i)
3900 unsigned NumSrcElts = Mask.size();
3911 // We have to repeat the widening until we reach the target size, but we can
3914 while (ScaledMask.size() > NumDstElts) {
3971 assert(Bits.size() == Undefs.getBitWidth() &&
3985 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
4012 APInt Undefs = APInt::getZero(Bits.size());
4107 Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
4114 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
4150 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
4154 /// Widen a vector to a larger size with the same scalar type, with the new
4179 /// Widen a vector to a larger size with the same scalar type, with the new
4239 // insert_subvector(insert_subvector(undef, x, lo), y, hi)
4249 LoOps.size() == HiOps.size()) {
4258 // insert_subvector(x, extract_subvector(x, lo), hi)
4264 // insert_subvector(undef, x, hi)
4285 unsigned NumSubOps = SubOps.size();
4375 // Helper for splitting operands of an operation to legal target size and
4392 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size");
4397 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size");
4402 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size");
4689 "Expected VTs to be the same size!");
4746 SmallVector<SDValue> Ops(Mask.size(), DAG.getUNDEF(VT.getScalarType()));
4747 for (int I = 0, NumElts = Mask.size(); I != NumElts; ++I) {
4910 // Bitcast a source array of element bits to the target size.
4922 // If we're already the right size, don't bother bitcasting.
5030 APInt UndefSrcElts = APInt::getZero(SrcEltBits.size());
5031 for (unsigned I = 0, E = SrcEltBits.size(); I != E; ++I)
5151 for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
5234 for (int i = 0, e = EltBits.size(); i != e; ++i) {
5282 for (unsigned I = 0, E = EltBits.size(); I != E; ++I)
5717 if (M >= (int)Mask.size())
5718 M -= Mask.size();
5750 int Size = Mask.size();
5761 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
5841 int Size = Mask.size();
5927 "Different mask size from vector size!");
5936 unsigned NumElts = Mask.size();
5938 KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch");
5952 unsigned NumElts = Mask.size();
6018 assert(NumElts == DemandedElts.getBitWidth() && "Unexpected vector size");
6051 for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
6078 size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
6080 narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
6081 narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
6168 if (SubMask.size() != NumSubElts) {
6169 assert(((SubMask.size() % NumSubElts) == 0 ||
6170 (NumSubElts % SubMask.size()) == 0) && "Illegal submask scale");
6171 if ((NumSubElts % SubMask.size()) == 0) {
6172 int Scale = NumSubElts / SubMask.size();
6177 int Scale = SubMask.size() / NumSubElts;
6178 NumSubElts = SubMask.size();
6502 int MaskWidth = Mask.size();
6504 for (int i = 0, e = Inputs.size(); i < e; ++i) {
6505 int lo = UsedInputs.size() * MaskWidth;
6506 int hi = lo + MaskWidth;
6511 if ((lo <= M) && (M < hi))
6515 if (none_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
6524 for (int j = 0, ue = UsedInputs.size(); j != ue; ++j) {
6529 M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth);
6888 assert(Zeroable.size() - Zeroable.count() > 1 &&
7127 unsigned NumElems = Elts.size();
7183 "Register/Memory size mismatch");
7414 assert(Elts.size() == VT.getVectorNumElements());
7430 assert(ScalarSize == 64 && "Unsupported floating point scalar size");
7437 for (unsigned I = 0, E = Bits.size(); I != E; ++I)
7454 assert(ScalarSize == 64 && "Unsupported floating point scalar size");
7523 assert((NumElts % Sequence.size()) == 0 && "Sequence doesn't fit.");
7524 if (Sequence.size() == 1)
7536 unsigned SeqLen = Sequence.size();
7643 // When optimizing for size, generate up to 5 extra bytes for a broadcast
7646 // it may be detrimental to overall size. There needs to be a way to detect
7647 // that condition to know if this is truly a size win.
7654 // But override that restriction when optimizing for size.
7661 // For size optimization, also splat v2f64 and v2i64, and for size opt
7793 if (InsertIndices.size() > 1)
8422 // If either input vector is not the same size as the build vector,
8423 // extract/insert the low bits to the correct size.
8691 // Adjust IndicesVec to match VT size.
8693 "Illegal variable permute mask size");
8695 // Narrow/widen the indices vector to the correct size.
9244 if (Values.size() == 1) {
9277 if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) {
9383 assert(Values.size() > 1 && "Expected non-undef and non-splat vector");
9427 Mask.append(NumElems - Mask.size(), SM_SentinelUndef);
9616 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
9634 "Illegal shuffle lane size");
9636 int Size = Mask.size();
9657 "Illegal shuffle lane size");
9658 int NumElts = Mask.size();
9694 int Size = Mask.size();
9745 int Size = Mask.size();
9848 int Size = Mask.size();
9849 if (Size != (int)ExpectedMask.size())
9882 int Size = Mask.size();
9883 if (Size != (int)ExpectedMask.size())
9893 // Don't use V1/V2 if they're not the same size as the shuffle mask type.
9956 // Create 128-bit vector type based on mask size.
9957 MVT EltVT = MVT::getIntegerVT(128 / Mask.size());
9958 MVT VT = MVT::getVectorVT(EltVT, Mask.size());
9980 assert(Mask.size() % 2 == 0 && "Expecting even number of elements in mask");
9981 unsigned HalfSize = Mask.size() / 2;
9998 assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
10027 // Mask elements are assumed to be -1, 0 or 1 to match the SHUFPD lo/hi pattern.
10029 assert((Mask.size() == 2 || Mask.size() == 4 || Mask.size() == 8) &&
10030 "Unexpected SHUFPD mask size");
10037 assert(0 <= FirstIndex && FirstIndex < (int)Mask.size() &&
10044 for (unsigned I = 0, E = Mask.size(); I != E; ++I)
10052 for (unsigned I = 0, E = Mask.size(); I != E; ++I)
10075 for (int i = 0, e = Mask.size(); i < e; i++) {
10101 int Size = Mask.size();
10196 // Attempt to match the target mask against the unpack lo/hi mask patterns.
10216 // If an unary shuffle, attempt to match as an unpack lo/hi with zero.
10325 unsigned NumElts = Mask.size();
10576 int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
10578 "We should only be called with masks with a power-of-2 size!");
10588 for (int i = 0, e = Mask.size(); i < e; ++i) {
10595 for (unsigned j = 0; j != std::size(ViableForN); ++j)
10610 for (unsigned j = 0; j != std::size(ViableForN); ++j)
10720 // Repeatedly pack down to the target size.
10754 MaskVT = MVT::getVectorVT(EltVT, Mask.size());
10763 MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size());
10769 SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
10771 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
10806 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
10832 assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask");
10834 int NumElts = Mask.size();
10947 assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!");
11022 for (int i = 0, Size = Mask.size(); i < Size; ++i)
11042 // Attempt to lower to a bitmask if we can. Only if not optimizing for size.
11073 SmallVector<int, 32> BlendMask(Mask.size(), -1);
11074 SmallVector<int, 32> PermuteMask(Mask.size(), -1);
11076 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
11109 int NumElts = Mask.size();
11193 int Size = Mask.size();
11194 assert(Mask.size() >= 2 && "Single element masks are invalid.");
11406 return NumUndefs <= Mask.size() / 2 && UniqueElt.has_value();
11419 int NumElts = Mask.size();
11543 // AVX512 only has vXi32/vXi64 rotates, so limit the rotation sub group size.
11550 unsigned NumElts = Mask.size();
11603 int NumElts = Mask.size();
11703 int NumElts = RepeatedMask.size();
11736 assert(Mask.size() <= 16 &&
11787 unsigned NumElts = Mask.size();
11831 unsigned NumElts = Mask.size();
11907 int Size = Mask.size();
11946 // keep doubling the size of the integer elements up to that. We can
11970 int Size = Mask.size();
11971 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
12007 int Size = Mask.size();
12009 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
12063 int Size = Mask.size();
12065 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
12232 assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
12327 assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");
12401 "The input vector size must be divisible by the extended size.");
12442 // If the bitcasts shift the element size, we can't extract an equivalent
12450 // Ensure the scalar operand is the same size as the destination.
12493 find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) -
12497 for (int i = 0, Size = Mask.size(); i < Size; ++i)
12516 SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
12545 } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
12548 // element size is too small to use VZEXT_MOVL to clear the high bits.
12588 SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
12661 assert(Mask.size() == 4 && "Unsupported mask size!");
12684 int Size = Mask.size();
12771 assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
12850 // is expected to be a win for code size, register pressure, and possibly
12912 "Unexpected vector size");
12931 "Unexpected scalar size");
12962 assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
13073 assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
13157 assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
13349 assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
13453 assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
13594 assert(Mask.size() == 8 && "Shuffle mask length doesn't match!");
13621 int NumHToL = LoInputs.size() - NumLToL;
13623 int NumHToH = HiInputs.size() - NumLToH;
13657 for (int j = 0, e = DWordPairs.size(); j < e; ++j) {
13669 PSHUFDMask[DWord] = DOffset + DWordPairs.size();
13674 if (DWordPairs.size() <= 2) {
13718 assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) &&
13720 assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) &&
13722 assert(AToAInputs.size() + BToAInputs.size() == 4 &&
13725 bool ThreeAInputs = AToAInputs.size() == 3;
13751 if (BToBInputs.size() == 2 && AToBInputs.size() == 2) {
13797 BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
13849 if (InPlaceInputs.size() == 1) {
13864 assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
13949 if (IncomingInputs.size() == 1) {
13959 } else if (IncomingInputs.size() == 2) {
14026 llvm_unreachable("Unhandled input size!");
14089 int Size = Mask.size();
14151 assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
14355 assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
14438 assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
14521 bool TargetLo = LoInputs.size() >= HiInputs.size();
14532 for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) {
14832 ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2);
14833 ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2);
14960 int Size = Mask.size();
15052 /// Attempts to find a sublane permute with the given size
15157 int Size = Mask.size();
15180 int Size = Mask.size();
15213 computeInLaneShuffleMask(Mask, Mask.size() / 2, InLaneMask);
15277 // TODO: If minimizing size and one of the inputs is a zero vector and the
15362 int NumElts = Mask.size();
15402 assert(M1.size() == M2.size() && "Unexpected mask size");
15403 for (int i = 0, e = M1.size(); i != e; ++i)
15410 assert(Mask.size() == MergedMask.size() && "Unexpected mask size");
15411 for (int i = 0, e = MergedMask.size(); i != e; ++i) {
15529 assert((Mask.size() == HalfMask.size() * 2) &&
15538 unsigned HalfNumElts = HalfMask.size();
15659 assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
16017 if (Zeroable.countl_one() < (Mask.size() - 8))
16063 size_t Size = Mask.size();
16064 assert(Size % 2 == 0 && "Expected even mask size");
16087 // Limit user size to two for now.
16088 if (Shuffles.size() != 2)
16132 assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
16252 assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
16365 assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
16378 computeInLaneShuffleMask(Mask, Mask.size() / 2, InLaneMask);
16393 assert(RepeatedMask.size() == 4 &&
16487 assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
16542 assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
16629 assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
16752 assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
16945 "Unexpected element type size for 128bit shuffle.");
16949 assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");
16955 assert(Widened128Mask.size() == 4 && "Shuffle widening mismatch");
17052 assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
17106 assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
17112 assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
17173 assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
17246 assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
17276 assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
17343 assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
17414 assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
17516 int NumElts = Mask.size();
17589 int NumElts = Mask.size();
17623 int Size = Mask.size();
17663 int NumElts = Mask.size();
17715 // Shift left to put the original vector in the MSBs of the new size.
17797 int NumElements = Mask.size();
17835 for (int i = 0, Size = Mask.size(); i < Size; ++i)
17844 for (int i = 0, Size = Mask.size(); i < Size; ++i)
17961 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
18038 assert(NumElements == (int)Mask.size() &&
18040 "shouldn't alter the shuffle mask size");
18203 // SEXT/TRUNC cases where the mask doesn't match the destination size.
18761 // But if optimizing for size and there's a load folding opportunity,
19858 /// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
20112 // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
20116 // uint4 hi = (v >> 16) | (uint4) 0x53000000;
20118 // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
20154 // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
20168 // uint4 hi = (v >> 16) | (uint4) 0x53000000;
20176 // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
20326 // Build a 64 bit pair (FF, 0) in the constant pool, with FF in the hi bits.
21704 // If the saturation width is smaller than the size of the temporary result,
21786 // For signed conversions where we saturate to the same size as the
22105 /// Depending on uarch and/or optimizing for size, we might prefer to use a
22179 /// Depending on uarch and/or optimizing for size, we might prefer to use a
22241 // the logic op, so it can save (~4 bytes) on code size.
22368 // Also promote i16 to i32 for performance / code size reason.
22616 for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
22736 // If the input vector has vector elements wider than the target test size,
22875 for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1;
23391 // are optimizing for size and the immedaite won't fit in a byte.
24287 // encoding size - so it must either already be a i8 or i32 immediate, or it
24467 // we need mask of all zeros or ones with same size of the other
24627 // of 3 logic instructions for size savings and potentially speed.
25020 // MSBs of the new element size.
25096 /// Change a vector store into a pair of half-size vector stores.
25829 assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!");
25909 "querying registration node size for function without personality");
28599 // into lo/hi nibbles and use the PSHUFB LUT to perform CTLZ on each of them.
28600 // If the hi input nibble is zero then we add both results together, otherwise
28601 // we just take the hi result (by masking the lo result to zero before the
28623 // Merge result back from vXi8 back to VT, working on the lo/hi halves
29195 // Extract the lo/hi parts to any extend to i16.
29223 // Multiply, mask the lower 8bits of the lo/hi results and pack.
29283 // Only multiply lo/hi halves that aren't known to be zero.
29373 // Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and
30193 if (UniqueCstAmt.size() == 2 &&
30243 TmpAmtWideElts.resize(AmtWideElts.size() / 2);
30245 for (unsigned SrcI = 0, E = AmtWideElts.size(); SrcI != E; SrcI += 2) {
30285 if (WideEltSizeInBits * AmtWideElts.size() >= 512 &&
30291 if (WideEltSizeInBits * AmtWideElts.size() >= 512 && IsConstantSplat) {
30304 if (WideEltSizeInBits * AmtWideElts.size() >= 256 &&
30308 unsigned WideNumElts = AmtWideElts.size();
30502 if (LUT.size() == NumElts) {
30942 // Expand slow SHLD/SHRD cases if we are not optimizing for size.
31262 // v8i16/v16i16: perform unsigned multiply hi/lo and OR the results.
31909 unsigned size = 0;
31912 case MVT::i8: Reg = X86::AL; size = 1; break;
31913 case MVT::i16: Reg = X86::AX; size = 2; break;
31914 case MVT::i32: Reg = X86::EAX; size = 4; break;
31917 Reg = X86::RAX; size = 8;
31925 DAG.getTargetConstant(size, DL, MVT::i8),
32050 assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!");
33109 I = AsmStr.size() - OpNoStr1.size();
33864 // Preserve what we know about the size of the original result. If the
36041 // Set the call frame size on entry to the new basic blocks.
36138 // test rsp size
37131 LPadList.reserve(CallSiteNumToLPad.size());
37196 .addImm(LPadList.size());
38076 // Truncate the width to size to handle illegal types.
38606 unsigned NumOps = Ops.size();
38608 if (Mask.size() == NumElts) {
38674 // PACKSS is just a truncation if the sign bits extend to the packed size.
38775 unsigned NumOps = Ops.size();
38777 if (Mask.size() == NumElts) {
38848 unsigned NumMaskElts = Mask.size();
39004 unsigned NumMaskElts = Mask.size();
39033 ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
39035 for (int i = 0, e = Mask.size(); i != e; ++i) {
39152 unsigned NumMaskElts = Mask.size();
39347 unsigned NumMaskElts = Mask.size();
39396 assert(RepeatedMask.size() == 8 &&
39397 "Repeated mask size doesn't match!");
39527 assert((Inputs.size() == 1 || Inputs.size() == 2) &&
39546 bool UnaryShuffle = (Inputs.size() == 1);
39554 (RootSizeInBits % VT2.getSizeInBits()) == 0 && "Vector size mismatch");
39558 unsigned NumBaseMaskElts = BaseMask.size();
39570 // Don't combine if we are a AVX512/EVEX target and the mask element size
39571 // is different from the root element size - this would prevent writemasks
39598 if (Mask.size() <= NumElts &&
39628 assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
39713 // Prefer blends for sequential shuffles unless we are optimizing for size.
39750 assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
39760 if (IsMaskedShuffle && NumRootElts > Mask.size()) {
39761 assert((NumRootElts % Mask.size()) == 0 && "Illegal mask size");
39762 int MaskScale = NumRootElts / Mask.size();
39768 unsigned NumMaskElts = Mask.size();
40266 // type size to attempt to combine:
40275 unsigned NumMaskElts = BaseMask.size();
40276 unsigned NumInputs = Inputs.size();
40338 int lo = I * WideMask.size();
40339 int hi = (I + 1) * WideMask.size();
40341 if (lo <= M && M < hi)
40365 if (AdjustedMasks == 0 || WideInputs.size() > 2)
40371 // so it will reduce the size of the mask to the minimal width mask which
40373 while (WideMask.size() > 1) {
40382 if (WideInputs.size() == 2 && canonicalizeShuffleMaskWithCommute(WideMask)) {
40450 unsigned EltSizeInBits = RootSizeInBits / Mask.size();
40453 (isPack || shouldUseHorizontalOp(Ops.size() == 1, DAG, Subtarget))) {
40485 if (Ops.size() >= 2) {
40523 if (2 < Ops.size())
40526 SDValue BC1 = BC[BC.size() - 1];
40527 if (Mask.size() == VT0.getVectorNumElements()) {
40531 if (Ops.size() == 2) {
40578 bool SingleOp = (Ops.size() == 1);
40600 if (Ops.size() == 1 && NumLanes == 2 &&
40628 unsigned NumMaskElts = Mask.size();
40630 unsigned NumOps = Ops.size();
40642 // If we're optimizing for size, only fold if at least one of the constants is
40724 /// equivalent. In most cases, this is just an encoding size win, but
40751 (RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
40776 "Can only combine shuffles upto size of the root op.");
40779 APInt OpDemandedElts = APInt::getZero(RootMask.size());
40781 int BaseIdx = RootMask.size() * SrcOpIndex;
40782 if (isInRange(M, BaseIdx, BaseIdx + RootMask.size()))
40788 unsigned NumOpMaskElts = RootMask.size() / Scale;
40789 assert((RootMask.size() % Scale) == 0 && "Root mask size mismatch");
40791 .extractBits(RootMask.size() - NumOpMaskElts, NumOpMaskElts)
40830 unsigned OpMaskSize = OpMask.size();
40831 if (OpInputs.size() > 1) {
40850 bool EmptyRoot = (Depth == 0) && (RootMask.size() == 1);
40856 APInt UsedInputs = APInt::getZero(OpInputs.size());
40857 for (int i = 0, e = OpMask.size(); i != e; ++i) {
40861 UsedInputs.setBit(M / OpMask.size());
40882 for (int i = 0, e = Ops.size(); i < e; ++i)
40892 return Ops.size() - 1;
40900 assert(((RootMask.size() > OpMask.size() &&
40901 RootMask.size() % OpMask.size() == 0) ||
40902 (OpMask.size() > RootMask.size() &&
40903 OpMask.size() % RootMask.size() == 0) ||
40904 OpMask.size() == RootMask.size()) &&
40910 assert(llvm::has_single_bit<uint32_t>(RootMask.size()) &&
40912 assert(llvm::has_single_bit<uint32_t>(OpMask.size()) &&
40914 unsigned RootMaskSizeLog2 = llvm::countr_zero(RootMask.size());
40915 unsigned OpMaskSizeLog2 = llvm::countr_zero(OpMask.size());
40917 unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size());
40919 std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2);
40920 unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2);
40972 int InputIdx = OpMask[OpIdx] / (int)OpMask.size();
40982 for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
40988 int Lo = I * Mask.size();
40989 int Hi = (I + 1) * Mask.size();
40990 int NewHi = Lo + (Mask.size() / Scale);
40998 // Peek through any free extract_subvector nodes back to root size.
41013 if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) &&
41031 if (Ops.size() < (MaxDepth - Depth)) {
41032 for (int i = 0, e = Ops.size(); i < e; ++i) {
41063 unsigned EltSizeInBits = RootSizeInBits / Mask.size();
41084 int Lo = OpIdx * Mask.size();
41085 int Hi = Lo + Mask.size();
41088 APInt OpDemandedElts(Mask.size(), 0);
41099 unsigned NumExpectedVectorElts = Mask.size();
41135 if (Ops.size() <= 2) {
41139 // so it will reduce the size of the mask to the minimal width mask which
41141 while (Mask.size() > 1) {
41150 if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
41165 if (Ops.size() != 2 || !Subtarget.hasAVX2() || RootSizeInBits != 128 ||
41166 (RootSizeInBits / Mask.size()) != 64 ||
41956 // Make sure the shift amount and the load size are divisible by 16.
42149 assert(Ops.size() == 2 && LHS == peekThroughOneUseBitcasts(Ops[0]) &&
42180 if (getTargetShuffleMask(N, false, Ops, Mask) && Ops.size() == 2) {
42188 SubOps.size() == 1 && isUndefOrInRange(SubScaledMask, 0, 4)) {
42233 collectConcatOps(LHS.getNode(), LHSOps, DAG) && LHSOps.size() == 2) {
42238 collectConcatOps(RHS.getNode(), RHSOps, DAG) && RHSOps.size() == 2) {
42275 if (collectConcatOps(Src.getNode(), SubOps, DAG) && SubOps.size() == 2)
42326 assert(Mask.size() == 4);
42485 assert(Mask.size() == NumElts && "Unexpected shuffle mask size");
42492 M = (M < 0 ? M : M & (Mask.size() - 1));
42611 unsigned Size = Mask.size();
42832 /// the destination vector, narrow the shuffle to half its original size.
42848 SmallVector<int, 8> HalfMask(Mask.size() / 2);
43676 "Unsupported vector size");
43707 // Shuffle inputs must be the same size as the result.
43708 if (OpMask.size() != (unsigned)NumElts ||
43719 int NumSrcs = OpInputs.size();
44414 int NumOps = ShuffleOps.size();
44415 if (ShuffleMask.size() == (unsigned)NumElts &&
44465 SmallVector<APInt, 2> DemandedSrcElts(Ops.size(),
44472 assert(0 <= M.value() && M.value() < (int)(Ops.size() * NumElts) &&
44547 // Helper to peek through bitops/trunc/setcc to determine size of source vector.
44548 // Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
44703 SubSrcOps.size() >= 2) {
44937 while (Ops.size() > 1) {
44938 unsigned NumOps = Ops.size();
45138 // Don't swap i8/i16 since don't have fp types that size.
45490 // First, reduce the source down to 128-bit, applying BinOp to lo/hi.
45827 // Note: SrcVec might not have a VecVT type, but it must be the same size.
46000 // Shuffle inputs must be the same size as the result.
46006 // Attempt to narrow/widen the shuffle mask to the correct size.
46007 if (Mask.size() != NumSrcElts) {
46008 if ((NumSrcElts % Mask.size()) == 0) {
46010 int Scale = NumSrcElts / Mask.size();
46013 } else if ((Mask.size() % NumSrcElts) == 0) {
46016 int Scale = Mask.size() / NumSrcElts;
46019 for (int i = 0, e = (int)Mask.size(); i != e; ++i)
46024 while (Mask.size() > NumSrcElts &&
46033 if (Mask.size() == NumSrcElts) {
46037 unsigned Scale = Mask.size() / NumSrcElts;
46038 if ((Mask.size() % NumSrcElts) != 0 || SrcVT.isFloatingPoint())
46045 ExtractVT = EVT::getVectorVT(*DAG.getContext(), ExtractSVT, Mask.size());
46058 SDValue SrcOp = Ops[ExtractIdx / Mask.size()];
46059 ExtractIdx = ExtractIdx % Mask.size();
46269 // vXi8 add reduction - sum lo/hi halves then use PSADBW.
46513 (IsVar || BoolExtracts.size() > 1)) {
46580 assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size");
46584 // If the scalar integer is greater than the vector element size, then we
46605 // If we have register broadcast instructions, use the scalar size as the
46615 // element size (we don't care about the upper bits) and broadcast it to all
46677 // are the same size as the select elements. Ie, the condition operand must
48224 assert(CmpBits == CmpVal.getBitWidth() && "Value size mismatch");
48284 Ops.size() == 2) {
48403 ShuffleInputs.size() == 1 && isCompletePermute(ShuffleMask) &&
48746 /// When the operands of vector mul are extended from smaller size values,
49409 // sexts in X86 are MOVs. The MOVs have the same code size
49410 // as above SHIFTs (only SHIFT on 1 has lower code size).
49501 // transform should reduce code size. It may also enable secondary transforms
49555 if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 &&
50005 assert(EltBits.size() == VT.getVectorNumElements() &&
50010 for (unsigned i = 0, e = EltBits.size(); i != e; ++i) {
50495 // Both MOVMSK operands must be from vectors of the same size and same element
50496 // size, but its OK for a fp/int diff.
51101 SrcOps.size() == 1) {
51369 for (unsigned i = 0, e = EltBits0.size(); i != e; ++i) {
51909 SrcOps.size() == 1) {
52047 /// XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
52175 // the element size of the destination type.
53202 if (!UseSubVector && SrcOps.size() <= 2 &&
53205 N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
53208 if (UseSubVector && SrcOps.size() == 1 &&
53584 // See if this has been extended from a smaller/equal size to
53585 // the truncation size, allowing a truncation to combine with the extend.
53951 // Make sure the element size doesn't change.
54001 for (unsigned I = 0, E = EltBits.size(); I < E; I++)
54606 // Favor a library call when operating on a scalar and minimizing code size.
55093 // promotion is also good in terms of code-size.
55162 // We can only do this if the vector size in 256 bits or less.
57406 unsigned NumOps = Ops.size();
57480 // concat(extract_subvector(x,lo), extract_subvector(x,hi)) -> x.
57506 SubVT.getVectorElementCount() * Subs.size());
57523 for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
57674 if (ConcatMask.size() == (NumOps * NumSrcElts)) {
57706 if (ConcatMask.size() == (NumOps * NumSrcElts)) {
58061 if (EltBits.size() == VT.getVectorNumElements()) {
58122 for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
58249 if (SubVectorOps.size() == 2 &&
58852 for (unsigned I = 0, E = LHSBits.size(); I != E; I += 2) {
59075 assert(NumElts == 2 && "Unexpected size");
59115 // The input chain and the size of the memory VT must match.
59555 // size/speed advantages vs. 32-bit ops, but they do have a major
59632 // Don't use `NotAnd` as even though `not` is generally shorter code size than
59749 S = S.substr(Piece.size());
59762 if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
59767 if (AsmPieces.size() == 3)
59789 switch (AsmPieces.size()) {
59837 if (Constraints.size() >= 2 &&
59838 Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
59839 Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
59889 if (Constraint.size() == 1) {
59927 else if (Constraint.size() == 2) {
60005 if (StringRef(Constraint).size() != 2)
60037 if (StringRef(Constraint).size() != 2)
60358 if (Constraint.size() == 1) {
60567 } else if (Constraint.size() == 2 && Constraint[0] == 'Y') {
60664 } else if (Constraint.size() == 2 && Constraint[0] == 'j') {
60705 if (Constraint.size() == 7 && Constraint[0] == '{' &&
60763 // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
60859 // for code size, we prefer to use a div instruction, as it is usually smaller
60863 // size, because it will have to be scalarized, while the alternative code
61035 // The default stack probe size is 4096 if the function has no stackprobesize
61037 return MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size",