X86ISelLowering.cpp - OpenGrok cross reference for /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp

Lines Matching full:blend
1613       // when we have a 256bit-wide blend with immediate.
3612 /// Return true if every element in Mask, is an in-place blend/select mask or is
5885       // loops converting between OR and BLEND shuffles due to
6693   // See if this build_vector can be lowered as a blend with zero.
6713     // Let the shuffle legalizer deal with blend operations.
8091   // Convert to blend(fsub,fadd).
8839   // and blend the FREEZE-UNDEF operands back in.
9459 /// that it is also not lane-crossing. It may however involve a blend from the
9958     // Don't bother if we can blend instead.
10004 // X86 has dedicated unpack instructions that can handle specific blend
10480 /// This handles cases where we can model a blend exactly as a bitmask due to
10515       return SDValue(); // Not a blend.
10533 /// Try to emit a blend instruction for a shuffle using bit math.
10535 /// This is used as a fallback approach when first class blend instructions are
10572   assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask");
10580   // then ensure the blend mask part for that lane just references that input.
10584   // Attempt to generate the binary blend mask. If an input is zero then
10628     // If we only used V2 then splat the lane blend mask to avoid any demanded
10630     // blend mask bit).
10639 /// Try to emit a blend instruction for a shuffle.
10644 /// that the shuffle mask is a blend, or convertible into a blend with zero.
10695     // Use PBLENDW for lower/upper lanes and then blend lanes.
10728     // If we have VPTERNLOG, we can use that as a bit blend.
10734     // Scale the blend by the number of bytes per element.
10737     // This form of blend is always done on bytes. Compute the byte vector
10801 /// Try to lower as a blend of elements from two inputs followed by
10804 /// This matches the pattern where we can blend elements from two inputs and
10811   // We build up the blend mask while checking whether a blend is a viable way
10825       return SDValue(); // Can't blend in the needed input!
10830   // If only immediate blends, then bail if the blend mask can't be widened to
11149 /// Generic routine to decompose a shuffle and blend into independent
11153 /// shuffle+blend operations on newer X86 ISAs where we have very fast blend
11155 /// blends. For vXi8/vXi16 shuffles we may use unpack instead of blend.
11164   // unpack/blend them together.
11204   // Currently, we may need to produce one shuffle per input, and blend results.
11214   // Try to lower with the simpler initial blend/unpack/rotate strategies unless
11248   // If the final mask is an alternating blend of vXi8/vXi16, convert to an
12844   // blend patterns if a zero-blend above didn't work.
12855     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
12857       return Blend;
12928   // We have different paths for blend lowering, but they all must use the
12932     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
12934       return Blend;
12997       // To make this work, blend them together as the first step.
13003       // Now proceed to reconstruct the final blend as we have the necessary
13030       // trying to place elements directly, just blend them and set up the final
13033       // The first two blend mask elements are for V1, the second two are for
13043       // a blend.
13075     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
13077       return Blend;
13169 /// blends we use the floating point domain blend instructions.
13239   // We have different paths for blend lowering, but they all must use the
13243     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
13245       return Blend;
13284   // We implement this with SHUFPS because it can blend from two vectors.
13803 /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
13804 /// blend if only one input is used.
13845   // If we need shuffled inputs from both, blend the two.
13865 /// the two inputs, try to interleave them. Otherwise, blend the low and high
13948   // We have different paths for blend lowering, but they all must use the
13952     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
13954       return Blend;
14057   // If we can't directly blend but can use PSHUFB, that will be better as it
14058   // can both shuffle and set up the inefficient blend.
14065   // We can always bit-blend if we have to so the fallback strategy is to
14344     // If both V1 and V2 are in use and we can use a direct blend or an unpack,
14349         if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask,
14351           return Blend;
14356       // the complexity of the shuffles goes away when we do the final blend as
14392   if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
14393     return Blend;
14470     // This will be a single vector shuffle instead of a blend so nuke VHiHalf.
14482     // VHiHalf so that we can blend them as i16s.
14618     // manually combine these blend masks as much as possible so that we create
14634       // We only use half of V1 so map the usage down into the final blend mask.
14643       // We only use half of V2 so map the usage down into the final blend mask.
14661 /// blend/unpack.
14676   // If this can be modeled as a broadcast of two elements followed by a blend,
14714   // Otherwise, just fall back to decomposed shuffles and a blend/unpack. This
14988   if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,
14990     return Blend;
15883   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
15885     return Blend;
15887   // Check if the blend happens to exactly fit that of SHUFPD.
15898   // canonicalize to a blend of splat which isn't necessary for this combine.
15906   // blend the result.
15932   // If we have AVX2 then we always want to lower with a blend because an v4 we
15960   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
15962     return Blend;
16026   // blend the result.
16051   // Otherwise fall back on generic blend lowering.
16068   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
16070     return Blend;
16167   // If we have AVX2 then we always want to lower with a blend because at v8 we
16215   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
16217     return Blend;
16315   // Otherwise fall back on generic blend lowering.
16345   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
16347     return Blend;
16468   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
16470     return Blend;
16784   // Check if the blend happens to exactly fit that of SHUFPD.
16793   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
16795     return Blend;
16829     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
16831       return Blend;
16837   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
16839     return Blend;
16933   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
16935     return Blend;
17030   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
17032     return Blend;
17090   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
17092     return Blend;
17163   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
17165     return Blend;
17174     // If we can't directly blend but can use PSHUFB, that will be better as it
17175     // can both shuffle and set up the inefficient blend.
17699         // Choose indices that are blend-friendly.
17798   // Try to lower this to a blend-style vector shuffle. This can handle all
17822   // into an i1 condition so that we can use the mask-based 512-bit blend
17863   // VSELECT-matching blend, return Op, and but if we need to expand, return
18262     // Lower insertion of v16i8/v32i8/v64i16 -1 elts as an 'OR' blend.
18274     // See if we can do this more efficiently with a blend shuffle with a
18291     // using a blend if we have AVX or AVX2 and the right data type.
18309     // then prefer the broadcast+blend sequence.
18389         // a vector, we prefer to generate a blend with immediate rather
24086       // case, so that sequence would be faster than a variable blend.
29559     // Only perform this blend if we can perform it without loading a mask.
29755         // to a masked blend which selects bytes based just on the sign bit
32042     // Emit a blend.
38230   // Attempt to match against a OR if we're performing a blend shuffle and the
40374 // Attempt to fold BLEND(PERMUTE(X),PERMUTE(Y)) -> PERMUTE(BLEND(X,Y))
40380   assert(isBlendOrUndef(BlendMask) && "Blend shuffle expected");
40389   // to the same width as the blend mask.
40417   // Use the permute demanded elts masks as the new blend mask.
40418   // Create the new permute mask as a blend of the 2 original permute masks.
40440   assert(isBlendOrUndef(NewBlendMask) && "Bad blend");
40444   // the blend mask is the same in the 128-bit subvectors (or can widen to
41042       // blend(bitcast(x),bitcast(y)) -> bitcast(blend(x,y)) to narrower types.
41043       // TODO: Handle MVT::v16i16 repeated blend mask.
41060       // blend(pshufb(x,m1),pshufb(y,m2))
41061       // --> m3 = blend(m1,m2)
41062       //     blend(pshufb(x,m3),pshufb(y,m3))
45664 /// this node with one of the variable blend instructions, restructure the
45686   // cases where a *dynamic* blend will fail even though a constant-condition
45687   // blend could be custom lowered.
45696   // rather than just the high bit and using an i8-element blend.
45705   // There are no 512-bit blend instructions that use sign bits.
51445     SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,
51447     return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
51468   SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
51471   return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
51490       if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI))
51491         return Blend;
56616         // MVT::v16i16 has repeated blend mask.