1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s 3 4define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { 5; CHECK-LABEL: @constant_blendvpd( 6; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[AB:%.*]], <2 x double> [[XY:%.*]], <2 x i32> <i32 0, i32 3> 7; CHECK-NEXT: ret <2 x double> [[TMP1]] 8; 9 %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>) 10 ret <2 x double> %1 11} 12 13define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) { 14; CHECK-LABEL: @constant_blendvpd_zero( 15; CHECK-NEXT: ret <2 x double> [[XY:%.*]] 16; 17 %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer) 18 ret <2 x double> %1 19} 20 21define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) { 22; CHECK-LABEL: @constant_blendvpd_dup( 23; CHECK-NEXT: ret <2 x double> [[XY:%.*]] 24; 25 %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel) 26 ret <2 x double> %1 27} 28 29define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { 30; CHECK-LABEL: @constant_blendvps( 31; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[XYZW:%.*]], <4 x float> [[ABCD:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 32; CHECK-NEXT: ret <4 x float> [[TMP1]] 33; 34 %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>) 35 ret <4 x float> %1 36} 37 38define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) { 39; CHECK-LABEL: @constant_blendvps_zero( 40; CHECK-NEXT: ret <4 x float> [[XYZW:%.*]] 41; 42 %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer) 43 ret <4 x float> %1 44} 45 46define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) { 47; CHECK-LABEL: @constant_blendvps_dup( 48; CHECK-NEXT: ret <4 x float> [[XYZW:%.*]] 49; 50 %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel) 51 ret <4 x float> %1 52} 53 54define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { 55; CHECK-LABEL: @constant_pblendvb( 56; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[XYZW:%.*]], <16 x i8> [[ABCD:%.*]], <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 21, i32 22, i32 7, i32 8, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 15> 57; CHECK-NEXT: ret <16 x i8> [[TMP1]] 58; 59 %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>) 60 ret <16 x i8> %1 61} 62 63define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) { 64; CHECK-LABEL: @constant_pblendvb_zero( 65; CHECK-NEXT: ret <16 x i8> [[XYZW:%.*]] 66; 67 %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer) 68 ret <16 x i8> %1 69} 70 71define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) { 72; CHECK-LABEL: @constant_pblendvb_dup( 73; CHECK-NEXT: ret <16 x i8> [[XYZW:%.*]] 74; 75 %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel) 76 ret <16 x i8> %1 77} 78 79define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { 80; CHECK-LABEL: @constant_blendvpd_avx( 81; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[AB:%.*]], <4 x double> [[XY:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 82; CHECK-NEXT: ret <4 x double> [[TMP1]] 83; 84 %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>) 85 ret <4 x double> %1 86} 87 88define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) { 89; CHECK-LABEL: @constant_blendvpd_avx_zero( 90; CHECK-NEXT: ret <4 x double> [[XY:%.*]] 91; 92 %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer) 93 ret <4 x double> %1 94} 95 96define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) { 97; CHECK-LABEL: @constant_blendvpd_avx_dup( 98; CHECK-NEXT: ret <4 x double> [[XY:%.*]] 99; 100 %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel) 101 ret <4 x double> %1 102} 103 104define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { 105; CHECK-LABEL: @constant_blendvps_avx( 106; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[XYZW:%.*]], <8 x float> [[ABCD:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 4, i32 5, i32 6, i32 15> 107; CHECK-NEXT: ret <8 x float> [[TMP1]] 108; 109 %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>) 110 ret <8 x float> %1 111} 112 113define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) { 114; CHECK-LABEL: @constant_blendvps_avx_zero( 115; CHECK-NEXT: ret <8 x float> [[XYZW:%.*]] 116; 117 %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer) 118 ret <8 x float> %1 119} 120 121define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) { 122; CHECK-LABEL: @constant_blendvps_avx_dup( 123; CHECK-NEXT: ret <8 x float> [[XYZW:%.*]] 124; 125 %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel) 126 ret <8 x float> %1 127} 128 129define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { 130; CHECK-LABEL: @constant_pblendvb_avx2( 131; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[XYZW:%.*]], <32 x i8> [[ABCD:%.*]], <32 x i32> <i32 0, i32 1, i32 34, i32 3, i32 36, i32 37, i32 38, i32 7, i32 8, i32 9, i32 42, i32 11, i32 44, i32 45, i32 46, i32 15, i32 16, i32 17, i32 50, i32 19, i32 52, i32 53, i32 54, i32 23, i32 24, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 31> 132; CHECK-NEXT: ret <32 x i8> [[TMP1]] 133; 134 %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, 135 <32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, 136 i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, 137 i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, 138 i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>) 139 ret <32 x i8> %1 140} 141 142define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) { 143; CHECK-LABEL: @constant_pblendvb_avx2_zero( 144; CHECK-NEXT: ret <32 x i8> [[XYZW:%.*]] 145; 146 %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer) 147 ret <32 x i8> %1 148} 149 150define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) { 151; CHECK-LABEL: @constant_pblendvb_avx2_dup( 152; CHECK-NEXT: ret <32 x i8> [[XYZW:%.*]] 153; 154 %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel) 155 ret <32 x i8> %1 156} 157 158define <4 x float> @sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i1> %cond) { 159; CHECK-LABEL: @sel_v4f32( 160; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]] 161; CHECK-NEXT: ret <4 x float> [[R]] 162; 163 %s = sext <4 x i1> %cond to <4 x i32> 164 %b = bitcast <4 x i32> %s to <4 x float> 165 %r = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %x, <4 x float> %y, <4 x float> %b) 166 ret <4 x float> %r 167} 168 169define <2 x double> @sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i1> %cond) { 170; CHECK-LABEL: @sel_v2f64( 171; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[COND:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[X:%.*]] 172; CHECK-NEXT: ret <2 x double> [[R]] 173; 174 %s = sext <2 x i1> %cond to <2 x i64> 175 %b = bitcast <2 x i64> %s to <2 x double> 176 %r = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %x, <2 x double> %y, <2 x double> %b) 177 ret <2 x double> %r 178} 179 180; Bitcast X, Y, and the select and remove the intrinsic. 181 182define <16 x i8> @sel_v4i32(<16 x i8> %x, <16 x i8> %y, <4 x i1> %cond) { 183; CHECK-LABEL: @sel_v4i32( 184; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[X:%.*]] to <4 x i32> 185; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[Y:%.*]] to <4 x i32> 186; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP1]] 187; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 188; CHECK-NEXT: ret <16 x i8> [[R]] 189; 190 %s = sext <4 x i1> %cond to <4 x i32> 191 %b = bitcast <4 x i32> %s to <16 x i8> 192 %r = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) 193 ret <16 x i8> %r 194} 195 196define <16 x i8> @sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i1> %cond) { 197; CHECK-LABEL: @sel_v16i8( 198; CHECK-NEXT: [[R:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i8> [[Y:%.*]], <16 x i8> [[X:%.*]] 199; CHECK-NEXT: ret <16 x i8> [[R]] 200; 201 %s = sext <16 x i1> %cond to <16 x i8> 202 %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %x, <16 x i8> %y, <16 x i8> %s) 203 ret <16 x i8> %r 204} 205 206; PR38814: https://bugs.llvm.org/show_bug.cgi?id=38814 207; Repeat the tests above using the minimal form that we expect when using C intrinsics in code. 208; This verifies that nothing is interfering with the blend transform. This also tests the 209; expected IR when 1 of the blend operands is a constant 0 vector. Potentially, this could 210; be transformed to bitwise logic in IR, but currently that transform is left to the backend. 211 212define <4 x float> @sel_v4f32_sse_reality(ptr %x, <4 x float> %y, <4 x float> %z) { 213; CHECK-LABEL: @sel_v4f32_sse_reality( 214; CHECK-NEXT: [[LD:%.*]] = load <4 x float>, ptr [[X:%.*]], align 16 215; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[Z:%.*]], [[Y:%.*]] 216; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[CMP]], <4 x float> zeroinitializer, <4 x float> [[LD]] 217; CHECK-NEXT: ret <4 x float> [[R]] 218; 219 %ld = load <4 x float>, ptr %x, align 16 220 %cmp = fcmp olt <4 x float> %z, %y 221 %sext = sext <4 x i1> %cmp to <4 x i32> 222 %cond = bitcast <4 x i32> %sext to <4 x float> 223 %r = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %ld, <4 x float> zeroinitializer, <4 x float> %cond) 224 ret <4 x float> %r 225} 226 227define <2 x double> @sel_v2f64_sse_reality(ptr nocapture readonly %x, <2 x double> %y, <2 x double> %z) { 228; CHECK-LABEL: @sel_v2f64_sse_reality( 229; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, ptr [[X:%.*]], align 16 230; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <2 x double> [[Z:%.*]], [[Y:%.*]] 231; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP]], <2 x double> zeroinitializer, <2 x double> [[LD]] 232; CHECK-NEXT: ret <2 x double> [[R]] 233; 234 %ld = load <2 x double>, ptr %x, align 16 235 %cmp = fcmp olt <2 x double> %z, %y 236 %sext = sext <2 x i1> %cmp to <2 x i64> 237 %cond = bitcast <2 x i64> %sext to <2 x double> 238 %r = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %ld, <2 x double> zeroinitializer, <2 x double> %cond) 239 ret <2 x double> %r 240} 241 242; Bitcast the inputs and the result and remove the intrinsic. 243 244define <2 x i64> @sel_v4i32_sse_reality(ptr nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) { 245; CHECK-LABEL: @sel_v4i32_sse_reality( 246; CHECK-NEXT: [[LD1:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 247; CHECK-NEXT: [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32> 248; CHECK-NEXT: [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <4 x i32> 249; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i32> [[YCAST]], [[ZCAST]] 250; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> [[LD1]] 251; CHECK-NEXT: [[RCAST:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> 252; CHECK-NEXT: ret <2 x i64> [[RCAST]] 253; 254 %ld = load <16 x i8>, ptr %x, align 16 255 %ycast = bitcast <2 x i64> %y to <4 x i32> 256 %zcast = bitcast <2 x i64> %z to <4 x i32> 257 %cmp = icmp sgt <4 x i32> %ycast, %zcast 258 %sext = sext <4 x i1> %cmp to <4 x i32> 259 %cond = bitcast <4 x i32> %sext to <16 x i8> 260 %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %ld, <16 x i8> zeroinitializer, <16 x i8> %cond) 261 %rcast = bitcast <16 x i8> %r to <2 x i64> 262 ret <2 x i64> %rcast 263} 264 265define <2 x i64> @sel_v16i8_sse_reality(ptr nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) { 266; CHECK-LABEL: @sel_v16i8_sse_reality( 267; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, ptr [[X:%.*]], align 16 268; CHECK-NEXT: [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <16 x i8> 269; CHECK-NEXT: [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <16 x i8> 270; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <16 x i8> [[YCAST]], [[ZCAST]] 271; CHECK-NEXT: [[R:%.*]] = select <16 x i1> [[CMP]], <16 x i8> zeroinitializer, <16 x i8> [[LD]] 272; CHECK-NEXT: [[RCAST:%.*]] = bitcast <16 x i8> [[R]] to <2 x i64> 273; CHECK-NEXT: ret <2 x i64> [[RCAST]] 274; 275 %ld = load <16 x i8>, ptr %x, align 16 276 %ycast = bitcast <2 x i64> %y to <16 x i8> 277 %zcast = bitcast <2 x i64> %z to <16 x i8> 278 %cmp = icmp sgt <16 x i8> %ycast, %zcast 279 %sext = sext <16 x i1> %cmp to <16 x i8> 280 %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %ld, <16 x i8> zeroinitializer, <16 x i8> %sext) 281 %rcast = bitcast <16 x i8> %r to <2 x i64> 282 ret <2 x i64> %rcast 283} 284 285define <4 x float> @sel_v16i8_bitcast_shuffle_bitcast_cmp(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) { 286; CHECK-LABEL: @sel_v16i8_bitcast_shuffle_bitcast_cmp( 287; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[A:%.*]], [[B:%.*]] 288; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x float> [[A]] to <8 x i32> 289; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x float> [[B]] to <8 x i32> 290; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 291; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 292; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 293; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]] 294; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> 295; CHECK-NEXT: ret <4 x float> [[RES]] 296; 297 %cmp = fcmp olt <8 x float> %a, %b 298 %sext = sext <8 x i1> %cmp to <8 x i32> 299 %a.bc = bitcast <8 x float> %a to <8 x i32> 300 %b.bc = bitcast <8 x float> %b to <8 x i32> 301 %sext.lo = shufflevector <8 x i32> %sext, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 302 %a.lo = shufflevector <8 x i32> %a.bc, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 303 %b.lo = shufflevector <8 x i32> %b.bc, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 304 %a.lo.bc = bitcast <4 x i32> %a.lo to <16 x i8> 305 %b.lo.bc = bitcast <4 x i32> %b.lo to <16 x i8> 306 %sext.lo.bc = bitcast <4 x i32> %sext.lo to <16 x i8> 307 %blendv = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a.lo.bc, <16 x i8> %b.lo.bc, <16 x i8> %sext.lo.bc) 308 %res = bitcast <16 x i8> %blendv to <4 x float> 309 ret <4 x float> %res 310} 311 312declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) 313declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) 314declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) 315 316declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) 317declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) 318declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) 319 320