1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F 6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,avx512vl | FileCheck %s --check-prefixes=AVX512VL 7 8define <2 x i64> @select_cast_cond_multiuse_v2i64(<2 x i64> %x, <2 x i64> %y, i2 %m, ptr %o) { 9; SSE2-LABEL: select_cast_cond_multiuse_v2i64: 10; SSE2: # %bb.0: 11; SSE2-NEXT: movd %edi, %xmm2 12; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 13; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2] 14; SSE2-NEXT: pand %xmm3, %xmm2 15; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 16; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2] 17; SSE2-NEXT: pand %xmm2, %xmm3 18; SSE2-NEXT: pand %xmm3, %xmm0 19; SSE2-NEXT: movdqa %xmm3, (%rsi) 20; SSE2-NEXT: pandn %xmm1, %xmm3 21; SSE2-NEXT: por %xmm3, %xmm0 22; SSE2-NEXT: retq 23; 24; SSE42-LABEL: select_cast_cond_multiuse_v2i64: 25; SSE42: # %bb.0: 26; SSE42-NEXT: movapd %xmm0, %xmm2 27; SSE42-NEXT: movd %edi, %xmm0 28; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 29; SSE42-NEXT: pmovsxbq {{.*#+}} xmm3 = [1,2] 30; SSE42-NEXT: pand %xmm3, %xmm0 31; SSE42-NEXT: pcmpeqq %xmm3, %xmm0 32; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 33; SSE42-NEXT: movdqa %xmm0, (%rsi) 34; SSE42-NEXT: movapd %xmm1, %xmm0 35; SSE42-NEXT: retq 36; 37; AVX2-LABEL: select_cast_cond_multiuse_v2i64: 38; AVX2: # %bb.0: 39; AVX2-NEXT: vmovd %edi, %xmm2 40; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 41; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [1,2] 42; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 43; AVX2-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 44; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 45; AVX2-NEXT: vmovdqa %xmm2, (%rsi) 46; AVX2-NEXT: retq 47; 48; AVX512F-LABEL: select_cast_cond_multiuse_v2i64: 49; AVX512F: # %bb.0: 50; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 51; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 52; AVX512F-NEXT: kmovw %edi, %k1 53; AVX512F-NEXT: vpternlogq {{.*#+}} zmm2 {%k1} {z} = -1 54; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 55; AVX512F-NEXT: vmovdqa %xmm2, (%rsi) 56; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 57; AVX512F-NEXT: vzeroupper 58; AVX512F-NEXT: retq 59; 60; AVX512VL-LABEL: select_cast_cond_multiuse_v2i64: 61; AVX512VL: # %bb.0: 62; AVX512VL-NEXT: kmovw %edi, %k1 63; AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 64; AVX512VL-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z} 65; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} 66; AVX512VL-NEXT: vmovdqa %xmm2, (%rsi) 67; AVX512VL-NEXT: retq 68 %z = bitcast i2 %m to <2 x i1> 69 %s = sext <2 x i1> %z to <2 x i64> 70 %v = select <2 x i1> %z, <2 x i64> %x, <2 x i64> %y 71 store <2 x i64> %s, ptr %o 72 ret <2 x i64> %v 73} 74 75define <4 x i32> @select_cast_cond_multiuse_v4i32(<4 x i32> %x, <4 x i32> %y, i4 %m, ptr %o) { 76; SSE2-LABEL: select_cast_cond_multiuse_v4i32: 77; SSE2: # %bb.0: 78; SSE2-NEXT: movd %edi, %xmm2 79; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 80; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8] 81; SSE2-NEXT: pand %xmm3, %xmm2 82; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 83; SSE2-NEXT: pand %xmm2, %xmm0 84; SSE2-NEXT: movdqa %xmm2, (%rsi) 85; SSE2-NEXT: pandn %xmm1, %xmm2 86; SSE2-NEXT: por %xmm2, %xmm0 87; SSE2-NEXT: retq 88; 89; SSE42-LABEL: select_cast_cond_multiuse_v4i32: 90; SSE42: # %bb.0: 91; SSE42-NEXT: movaps %xmm0, %xmm2 92; SSE42-NEXT: movd %edi, %xmm0 93; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 94; SSE42-NEXT: pmovsxbd {{.*#+}} xmm3 = [1,2,4,8] 95; SSE42-NEXT: pand %xmm3, %xmm0 96; SSE42-NEXT: pcmpeqd %xmm3, %xmm0 97; SSE42-NEXT: blendvps %xmm0, %xmm2, %xmm1 98; SSE42-NEXT: movdqa %xmm0, (%rsi) 99; SSE42-NEXT: movaps %xmm1, %xmm0 100; SSE42-NEXT: retq 101; 102; AVX2-LABEL: select_cast_cond_multiuse_v4i32: 103; AVX2: # %bb.0: 104; AVX2-NEXT: vmovd %edi, %xmm2 105; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 106; AVX2-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,2,4,8] 107; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 108; AVX2-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 109; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 110; AVX2-NEXT: vmovdqa %xmm2, (%rsi) 111; AVX2-NEXT: retq 112; 113; AVX512F-LABEL: select_cast_cond_multiuse_v4i32: 114; AVX512F: # %bb.0: 115; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 116; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 117; AVX512F-NEXT: kmovw %edi, %k1 118; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1 119; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 120; AVX512F-NEXT: vmovdqa %xmm2, (%rsi) 121; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 122; AVX512F-NEXT: vzeroupper 123; AVX512F-NEXT: retq 124; 125; AVX512VL-LABEL: select_cast_cond_multiuse_v4i32: 126; AVX512VL: # %bb.0: 127; AVX512VL-NEXT: kmovw %edi, %k1 128; AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 129; AVX512VL-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z} 130; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} 131; AVX512VL-NEXT: vmovdqa %xmm2, (%rsi) 132; AVX512VL-NEXT: retq 133 %z = bitcast i4 %m to <4 x i1> 134 %s = sext <4 x i1> %z to <4 x i32> 135 %v = select <4 x i1> %z, <4 x i32> %x, <4 x i32> %y 136 store <4 x i32> %s, ptr %o 137 ret <4 x i32> %v 138} 139 140define <8 x i16> @select_cast_cond_multiuse_v8i16(<8 x i16> %x, <8 x i16> %y, i8 %m, ptr %o) { 141; SSE2-LABEL: select_cast_cond_multiuse_v8i16: 142; SSE2: # %bb.0: 143; SSE2-NEXT: movd %edi, %xmm2 144; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7] 145; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 146; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128] 147; SSE2-NEXT: pand %xmm3, %xmm2 148; SSE2-NEXT: pcmpeqw %xmm3, %xmm2 149; SSE2-NEXT: pand %xmm2, %xmm0 150; SSE2-NEXT: movdqa %xmm2, (%rsi) 151; SSE2-NEXT: pandn %xmm1, %xmm2 152; SSE2-NEXT: por %xmm2, %xmm0 153; SSE2-NEXT: retq 154; 155; SSE42-LABEL: select_cast_cond_multiuse_v8i16: 156; SSE42: # %bb.0: 157; SSE42-NEXT: movdqa %xmm0, %xmm2 158; SSE42-NEXT: movd %edi, %xmm0 159; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 160; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 161; SSE42-NEXT: pmovzxbw {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128] 162; SSE42-NEXT: pand %xmm3, %xmm0 163; SSE42-NEXT: pcmpeqw %xmm3, %xmm0 164; SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1 165; SSE42-NEXT: movdqa %xmm0, (%rsi) 166; SSE42-NEXT: movdqa %xmm1, %xmm0 167; SSE42-NEXT: retq 168; 169; AVX2-LABEL: select_cast_cond_multiuse_v8i16: 170; AVX2: # %bb.0: 171; AVX2-NEXT: vmovd %edi, %xmm2 172; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2 173; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128] 174; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 175; AVX2-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 176; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 177; AVX2-NEXT: vmovdqa %xmm2, (%rsi) 178; AVX2-NEXT: retq 179; 180; AVX512F-LABEL: select_cast_cond_multiuse_v8i16: 181; AVX512F: # %bb.0: 182; AVX512F-NEXT: kmovw %edi, %k1 183; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1 184; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 185; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 186; AVX512F-NEXT: vmovdqa %xmm2, (%rsi) 187; AVX512F-NEXT: vzeroupper 188; AVX512F-NEXT: retq 189; 190; AVX512VL-LABEL: select_cast_cond_multiuse_v8i16: 191; AVX512VL: # %bb.0: 192; AVX512VL-NEXT: kmovw %edi, %k1 193; AVX512VL-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 194; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z} 195; AVX512VL-NEXT: vpmovdw %ymm2, %xmm2 196; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1)) 197; AVX512VL-NEXT: vmovdqa %xmm2, (%rsi) 198; AVX512VL-NEXT: vzeroupper 199; AVX512VL-NEXT: retq 200 %z = bitcast i8 %m to <8 x i1> 201 %s = sext <8 x i1> %z to <8 x i16> 202 %v = select <8 x i1> %z, <8 x i16> %x, <8 x i16> %y 203 store <8 x i16> %s, ptr %o 204 ret <8 x i16> %v 205} 206 207define <16 x i8> @select_cast_cond_multiuse_v16i8(<16 x i8> %x, <16 x i8> %y, i16 %m, ptr %o) { 208; SSE2-LABEL: select_cast_cond_multiuse_v16i8: 209; SSE2: # %bb.0: 210; SSE2-NEXT: movd %edi, %xmm2 211; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 212; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,1,1,4,5,6,7] 213; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 214; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 215; SSE2-NEXT: pand %xmm3, %xmm2 216; SSE2-NEXT: pcmpeqb %xmm3, %xmm2 217; SSE2-NEXT: pand %xmm2, %xmm0 218; SSE2-NEXT: movdqa %xmm2, (%rsi) 219; SSE2-NEXT: pandn %xmm1, %xmm2 220; SSE2-NEXT: por %xmm2, %xmm0 221; SSE2-NEXT: retq 222; 223; SSE42-LABEL: select_cast_cond_multiuse_v16i8: 224; SSE42: # %bb.0: 225; SSE42-NEXT: movdqa %xmm0, %xmm2 226; SSE42-NEXT: movd %edi, %xmm0 227; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 228; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 229; SSE42-NEXT: pand %xmm3, %xmm0 230; SSE42-NEXT: pcmpeqb %xmm3, %xmm0 231; SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1 232; SSE42-NEXT: movdqa %xmm0, (%rsi) 233; SSE42-NEXT: movdqa %xmm1, %xmm0 234; SSE42-NEXT: retq 235; 236; AVX2-LABEL: select_cast_cond_multiuse_v16i8: 237; AVX2: # %bb.0: 238; AVX2-NEXT: vmovd %edi, %xmm2 239; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 240; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 241; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 242; AVX2-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 243; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 244; AVX2-NEXT: vmovdqa %xmm2, (%rsi) 245; AVX2-NEXT: retq 246; 247; AVX512F-LABEL: select_cast_cond_multiuse_v16i8: 248; AVX512F: # %bb.0: 249; AVX512F-NEXT: kmovw %edi, %k1 250; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1 251; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 252; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 253; AVX512F-NEXT: vmovdqa %xmm2, (%rsi) 254; AVX512F-NEXT: vzeroupper 255; AVX512F-NEXT: retq 256; 257; AVX512VL-LABEL: select_cast_cond_multiuse_v16i8: 258; AVX512VL: # %bb.0: 259; AVX512VL-NEXT: kmovw %edi, %k1 260; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1 261; AVX512VL-NEXT: vpmovdb %zmm2, %xmm2 262; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1)) 263; AVX512VL-NEXT: vmovdqa %xmm2, (%rsi) 264; AVX512VL-NEXT: vzeroupper 265; AVX512VL-NEXT: retq 266 %z = bitcast i16 %m to <16 x i1> 267 %s = sext <16 x i1> %z to <16 x i8> 268 %v = select <16 x i1> %z, <16 x i8> %x, <16 x i8> %y 269 store <16 x i8> %s, ptr %o 270 ret <16 x i8> %v 271} 272 273define <8 x float> @select_cast_cond_multiuse_v8i16_v8f32(<8 x float> %x, <8 x float> %y, i8 %m, ptr %o) { 274; SSE2-LABEL: select_cast_cond_multiuse_v8i16_v8f32: 275; SSE2: # %bb.0: 276; SSE2-NEXT: movd %edi, %xmm4 277; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[0,0,0,0,4,5,6,7] 278; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,0,0,0] 279; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [1,2,4,8,16,32,64,128] 280; SSE2-NEXT: pand %xmm6, %xmm5 281; SSE2-NEXT: pcmpeqw %xmm6, %xmm5 282; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0] 283; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [1,2,4,8] 284; SSE2-NEXT: movdqa %xmm4, %xmm7 285; SSE2-NEXT: pand %xmm6, %xmm7 286; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 287; SSE2-NEXT: pand %xmm7, %xmm0 288; SSE2-NEXT: pandn %xmm2, %xmm7 289; SSE2-NEXT: por %xmm7, %xmm0 290; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 291; SSE2-NEXT: pand %xmm2, %xmm4 292; SSE2-NEXT: pcmpeqd %xmm2, %xmm4 293; SSE2-NEXT: pand %xmm4, %xmm1 294; SSE2-NEXT: pandn %xmm3, %xmm4 295; SSE2-NEXT: por %xmm4, %xmm1 296; SSE2-NEXT: movdqa %xmm5, (%rsi) 297; SSE2-NEXT: retq 298; 299; SSE42-LABEL: select_cast_cond_multiuse_v8i16_v8f32: 300; SSE42: # %bb.0: 301; SSE42-NEXT: movaps %xmm0, %xmm4 302; SSE42-NEXT: movd %edi, %xmm0 303; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm0[0,0,0,0,4,5,6,7] 304; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,0,0] 305; SSE42-NEXT: pmovzxbw {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128] 306; SSE42-NEXT: pand %xmm5, %xmm6 307; SSE42-NEXT: pcmpeqw %xmm5, %xmm6 308; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,0,0] 309; SSE42-NEXT: pmovsxbd {{.*#+}} xmm7 = [1,2,4,8] 310; SSE42-NEXT: movdqa %xmm5, %xmm0 311; SSE42-NEXT: pand %xmm7, %xmm0 312; SSE42-NEXT: pcmpeqd %xmm7, %xmm0 313; SSE42-NEXT: blendvps %xmm0, %xmm4, %xmm2 314; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = [16,32,64,128] 315; SSE42-NEXT: pand %xmm0, %xmm5 316; SSE42-NEXT: pcmpeqd %xmm0, %xmm5 317; SSE42-NEXT: movdqa %xmm5, %xmm0 318; SSE42-NEXT: blendvps %xmm0, %xmm1, %xmm3 319; SSE42-NEXT: movdqa %xmm6, (%rsi) 320; SSE42-NEXT: movaps %xmm2, %xmm0 321; SSE42-NEXT: movaps %xmm3, %xmm1 322; SSE42-NEXT: retq 323; 324; AVX2-LABEL: select_cast_cond_multiuse_v8i16_v8f32: 325; AVX2: # %bb.0: 326; AVX2-NEXT: vmovd %edi, %xmm2 327; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 328; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128] 329; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 330; AVX2-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm3 331; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128] 332; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2 333; AVX2-NEXT: vpcmpeqd %ymm4, %ymm2, %ymm2 334; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 335; AVX2-NEXT: vmovdqa %xmm3, (%rsi) 336; AVX2-NEXT: retq 337; 338; AVX512F-LABEL: select_cast_cond_multiuse_v8i16_v8f32: 339; AVX512F: # %bb.0: 340; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 341; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 342; AVX512F-NEXT: kmovw %edi, %k1 343; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1 344; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 345; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 346; AVX512F-NEXT: vmovdqa %xmm2, (%rsi) 347; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 348; AVX512F-NEXT: retq 349; 350; AVX512VL-LABEL: select_cast_cond_multiuse_v8i16_v8f32: 351; AVX512VL: # %bb.0: 352; AVX512VL-NEXT: kmovw %edi, %k1 353; AVX512VL-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 354; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z} 355; AVX512VL-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 356; AVX512VL-NEXT: vpmovdw %ymm2, (%rsi) 357; AVX512VL-NEXT: retq 358 %z = bitcast i8 %m to <8 x i1> 359 %s = sext <8 x i1> %z to <8 x i16> 360 %v = select <8 x i1> %z, <8 x float> %x, <8 x float> %y 361 store <8 x i16> %s, ptr %o 362 ret <8 x float> %v 363} 364