xref: /llvm-project/llvm/test/CodeGen/X86/pr62014.ll (revision 51e8f822f39174eaf83b1d5798de329518970b02)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2             | FileCheck %s --check-prefixes=SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2           | FileCheck %s --check-prefixes=SSE42
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2             | FileCheck %s --check-prefixes=AVX2
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f          | FileCheck %s --check-prefixes=AVX512F
6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,avx512vl | FileCheck %s --check-prefixes=AVX512VL
7
8define <2 x i64> @select_cast_cond_multiuse_v2i64(<2 x i64> %x, <2 x i64> %y, i2 %m, ptr %o) {
9; SSE2-LABEL: select_cast_cond_multiuse_v2i64:
10; SSE2:       # %bb.0:
11; SSE2-NEXT:    movd %edi, %xmm2
12; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
13; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1,2]
14; SSE2-NEXT:    pand %xmm3, %xmm2
15; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
16; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
17; SSE2-NEXT:    pand %xmm2, %xmm3
18; SSE2-NEXT:    pand %xmm3, %xmm0
19; SSE2-NEXT:    movdqa %xmm3, (%rsi)
20; SSE2-NEXT:    pandn %xmm1, %xmm3
21; SSE2-NEXT:    por %xmm3, %xmm0
22; SSE2-NEXT:    retq
23;
24; SSE42-LABEL: select_cast_cond_multiuse_v2i64:
25; SSE42:       # %bb.0:
26; SSE42-NEXT:    movapd %xmm0, %xmm2
27; SSE42-NEXT:    movd %edi, %xmm0
28; SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
29; SSE42-NEXT:    pmovsxbq {{.*#+}} xmm3 = [1,2]
30; SSE42-NEXT:    pand %xmm3, %xmm0
31; SSE42-NEXT:    pcmpeqq %xmm3, %xmm0
32; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
33; SSE42-NEXT:    movdqa %xmm0, (%rsi)
34; SSE42-NEXT:    movapd %xmm1, %xmm0
35; SSE42-NEXT:    retq
36;
37; AVX2-LABEL: select_cast_cond_multiuse_v2i64:
38; AVX2:       # %bb.0:
39; AVX2-NEXT:    vmovd %edi, %xmm2
40; AVX2-NEXT:    vpbroadcastd %xmm2, %xmm2
41; AVX2-NEXT:    vpmovsxbq {{.*#+}} xmm3 = [1,2]
42; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
43; AVX2-NEXT:    vpcmpeqq %xmm3, %xmm2, %xmm2
44; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
45; AVX2-NEXT:    vmovdqa %xmm2, (%rsi)
46; AVX2-NEXT:    retq
47;
48; AVX512F-LABEL: select_cast_cond_multiuse_v2i64:
49; AVX512F:       # %bb.0:
50; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
51; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
52; AVX512F-NEXT:    kmovw %edi, %k1
53; AVX512F-NEXT:    vpternlogq {{.*#+}} zmm2 {%k1} {z} = -1
54; AVX512F-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
55; AVX512F-NEXT:    vmovdqa %xmm2, (%rsi)
56; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
57; AVX512F-NEXT:    vzeroupper
58; AVX512F-NEXT:    retq
59;
60; AVX512VL-LABEL: select_cast_cond_multiuse_v2i64:
61; AVX512VL:       # %bb.0:
62; AVX512VL-NEXT:    kmovw %edi, %k1
63; AVX512VL-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
64; AVX512VL-NEXT:    vmovdqa64 %xmm2, %xmm2 {%k1} {z}
65; AVX512VL-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
66; AVX512VL-NEXT:    vmovdqa %xmm2, (%rsi)
67; AVX512VL-NEXT:    retq
68  %z = bitcast i2 %m to <2 x i1>
69  %s = sext <2 x i1> %z to <2 x i64>
70  %v = select <2 x i1> %z, <2 x i64> %x, <2 x i64> %y
71  store <2 x i64> %s, ptr %o
72  ret <2 x i64> %v
73}
74
75define <4 x i32> @select_cast_cond_multiuse_v4i32(<4 x i32> %x, <4 x i32> %y, i4 %m, ptr %o) {
76; SSE2-LABEL: select_cast_cond_multiuse_v4i32:
77; SSE2:       # %bb.0:
78; SSE2-NEXT:    movd %edi, %xmm2
79; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
80; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1,2,4,8]
81; SSE2-NEXT:    pand %xmm3, %xmm2
82; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
83; SSE2-NEXT:    pand %xmm2, %xmm0
84; SSE2-NEXT:    movdqa %xmm2, (%rsi)
85; SSE2-NEXT:    pandn %xmm1, %xmm2
86; SSE2-NEXT:    por %xmm2, %xmm0
87; SSE2-NEXT:    retq
88;
89; SSE42-LABEL: select_cast_cond_multiuse_v4i32:
90; SSE42:       # %bb.0:
91; SSE42-NEXT:    movaps %xmm0, %xmm2
92; SSE42-NEXT:    movd %edi, %xmm0
93; SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
94; SSE42-NEXT:    pmovsxbd {{.*#+}} xmm3 = [1,2,4,8]
95; SSE42-NEXT:    pand %xmm3, %xmm0
96; SSE42-NEXT:    pcmpeqd %xmm3, %xmm0
97; SSE42-NEXT:    blendvps %xmm0, %xmm2, %xmm1
98; SSE42-NEXT:    movdqa %xmm0, (%rsi)
99; SSE42-NEXT:    movaps %xmm1, %xmm0
100; SSE42-NEXT:    retq
101;
102; AVX2-LABEL: select_cast_cond_multiuse_v4i32:
103; AVX2:       # %bb.0:
104; AVX2-NEXT:    vmovd %edi, %xmm2
105; AVX2-NEXT:    vpbroadcastd %xmm2, %xmm2
106; AVX2-NEXT:    vpmovsxbd {{.*#+}} xmm3 = [1,2,4,8]
107; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
108; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
109; AVX2-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
110; AVX2-NEXT:    vmovdqa %xmm2, (%rsi)
111; AVX2-NEXT:    retq
112;
113; AVX512F-LABEL: select_cast_cond_multiuse_v4i32:
114; AVX512F:       # %bb.0:
115; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
116; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
117; AVX512F-NEXT:    kmovw %edi, %k1
118; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1
119; AVX512F-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
120; AVX512F-NEXT:    vmovdqa %xmm2, (%rsi)
121; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
122; AVX512F-NEXT:    vzeroupper
123; AVX512F-NEXT:    retq
124;
125; AVX512VL-LABEL: select_cast_cond_multiuse_v4i32:
126; AVX512VL:       # %bb.0:
127; AVX512VL-NEXT:    kmovw %edi, %k1
128; AVX512VL-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
129; AVX512VL-NEXT:    vmovdqa32 %xmm2, %xmm2 {%k1} {z}
130; AVX512VL-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
131; AVX512VL-NEXT:    vmovdqa %xmm2, (%rsi)
132; AVX512VL-NEXT:    retq
133  %z = bitcast i4 %m to <4 x i1>
134  %s = sext <4 x i1> %z to <4 x i32>
135  %v = select <4 x i1> %z, <4 x i32> %x, <4 x i32> %y
136  store <4 x i32> %s, ptr %o
137  ret <4 x i32> %v
138}
139
140define <8 x i16> @select_cast_cond_multiuse_v8i16(<8 x i16> %x, <8 x i16> %y, i8 %m, ptr %o) {
141; SSE2-LABEL: select_cast_cond_multiuse_v8i16:
142; SSE2:       # %bb.0:
143; SSE2-NEXT:    movd %edi, %xmm2
144; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
145; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
146; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
147; SSE2-NEXT:    pand %xmm3, %xmm2
148; SSE2-NEXT:    pcmpeqw %xmm3, %xmm2
149; SSE2-NEXT:    pand %xmm2, %xmm0
150; SSE2-NEXT:    movdqa %xmm2, (%rsi)
151; SSE2-NEXT:    pandn %xmm1, %xmm2
152; SSE2-NEXT:    por %xmm2, %xmm0
153; SSE2-NEXT:    retq
154;
155; SSE42-LABEL: select_cast_cond_multiuse_v8i16:
156; SSE42:       # %bb.0:
157; SSE42-NEXT:    movdqa %xmm0, %xmm2
158; SSE42-NEXT:    movd %edi, %xmm0
159; SSE42-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
160; SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
161; SSE42-NEXT:    pmovzxbw {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
162; SSE42-NEXT:    pand %xmm3, %xmm0
163; SSE42-NEXT:    pcmpeqw %xmm3, %xmm0
164; SSE42-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
165; SSE42-NEXT:    movdqa %xmm0, (%rsi)
166; SSE42-NEXT:    movdqa %xmm1, %xmm0
167; SSE42-NEXT:    retq
168;
169; AVX2-LABEL: select_cast_cond_multiuse_v8i16:
170; AVX2:       # %bb.0:
171; AVX2-NEXT:    vmovd %edi, %xmm2
172; AVX2-NEXT:    vpbroadcastb %xmm2, %xmm2
173; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
174; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
175; AVX2-NEXT:    vpcmpeqw %xmm3, %xmm2, %xmm2
176; AVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
177; AVX2-NEXT:    vmovdqa %xmm2, (%rsi)
178; AVX2-NEXT:    retq
179;
180; AVX512F-LABEL: select_cast_cond_multiuse_v8i16:
181; AVX512F:       # %bb.0:
182; AVX512F-NEXT:    kmovw %edi, %k1
183; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1
184; AVX512F-NEXT:    vpmovdw %zmm2, %ymm2
185; AVX512F-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
186; AVX512F-NEXT:    vmovdqa %xmm2, (%rsi)
187; AVX512F-NEXT:    vzeroupper
188; AVX512F-NEXT:    retq
189;
190; AVX512VL-LABEL: select_cast_cond_multiuse_v8i16:
191; AVX512VL:       # %bb.0:
192; AVX512VL-NEXT:    kmovw %edi, %k1
193; AVX512VL-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
194; AVX512VL-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
195; AVX512VL-NEXT:    vpmovdw %ymm2, %xmm2
196; AVX512VL-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1))
197; AVX512VL-NEXT:    vmovdqa %xmm2, (%rsi)
198; AVX512VL-NEXT:    vzeroupper
199; AVX512VL-NEXT:    retq
200  %z = bitcast i8 %m to <8 x i1>
201  %s = sext <8 x i1> %z to <8 x i16>
202  %v = select <8 x i1> %z, <8 x i16> %x, <8 x i16> %y
203  store <8 x i16> %s, ptr %o
204  ret <8 x i16> %v
205}
206
207define <16 x i8> @select_cast_cond_multiuse_v16i8(<16 x i8> %x, <16 x i8> %y, i16 %m, ptr %o) {
208; SSE2-LABEL: select_cast_cond_multiuse_v16i8:
209; SSE2:       # %bb.0:
210; SSE2-NEXT:    movd %edi, %xmm2
211; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
212; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,1,1,4,5,6,7]
213; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
214; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
215; SSE2-NEXT:    pand %xmm3, %xmm2
216; SSE2-NEXT:    pcmpeqb %xmm3, %xmm2
217; SSE2-NEXT:    pand %xmm2, %xmm0
218; SSE2-NEXT:    movdqa %xmm2, (%rsi)
219; SSE2-NEXT:    pandn %xmm1, %xmm2
220; SSE2-NEXT:    por %xmm2, %xmm0
221; SSE2-NEXT:    retq
222;
223; SSE42-LABEL: select_cast_cond_multiuse_v16i8:
224; SSE42:       # %bb.0:
225; SSE42-NEXT:    movdqa %xmm0, %xmm2
226; SSE42-NEXT:    movd %edi, %xmm0
227; SSE42-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
228; SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
229; SSE42-NEXT:    pand %xmm3, %xmm0
230; SSE42-NEXT:    pcmpeqb %xmm3, %xmm0
231; SSE42-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
232; SSE42-NEXT:    movdqa %xmm0, (%rsi)
233; SSE42-NEXT:    movdqa %xmm1, %xmm0
234; SSE42-NEXT:    retq
235;
236; AVX2-LABEL: select_cast_cond_multiuse_v16i8:
237; AVX2:       # %bb.0:
238; AVX2-NEXT:    vmovd %edi, %xmm2
239; AVX2-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
240; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
241; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
242; AVX2-NEXT:    vpcmpeqb %xmm3, %xmm2, %xmm2
243; AVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
244; AVX2-NEXT:    vmovdqa %xmm2, (%rsi)
245; AVX2-NEXT:    retq
246;
247; AVX512F-LABEL: select_cast_cond_multiuse_v16i8:
248; AVX512F:       # %bb.0:
249; AVX512F-NEXT:    kmovw %edi, %k1
250; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1
251; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
252; AVX512F-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
253; AVX512F-NEXT:    vmovdqa %xmm2, (%rsi)
254; AVX512F-NEXT:    vzeroupper
255; AVX512F-NEXT:    retq
256;
257; AVX512VL-LABEL: select_cast_cond_multiuse_v16i8:
258; AVX512VL:       # %bb.0:
259; AVX512VL-NEXT:    kmovw %edi, %k1
260; AVX512VL-NEXT:    vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1
261; AVX512VL-NEXT:    vpmovdb %zmm2, %xmm2
262; AVX512VL-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1))
263; AVX512VL-NEXT:    vmovdqa %xmm2, (%rsi)
264; AVX512VL-NEXT:    vzeroupper
265; AVX512VL-NEXT:    retq
266  %z = bitcast i16 %m to <16 x i1>
267  %s = sext <16 x i1> %z to <16 x i8>
268  %v = select <16 x i1> %z, <16 x i8> %x, <16 x i8> %y
269  store <16 x i8> %s, ptr %o
270  ret <16 x i8> %v
271}
272
273define <8 x float> @select_cast_cond_multiuse_v8i16_v8f32(<8 x float> %x, <8 x float> %y, i8 %m, ptr %o) {
274; SSE2-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
275; SSE2:       # %bb.0:
276; SSE2-NEXT:    movd %edi, %xmm4
277; SSE2-NEXT:    pshuflw {{.*#+}} xmm5 = xmm4[0,0,0,0,4,5,6,7]
278; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[0,0,0,0]
279; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [1,2,4,8,16,32,64,128]
280; SSE2-NEXT:    pand %xmm6, %xmm5
281; SSE2-NEXT:    pcmpeqw %xmm6, %xmm5
282; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0]
283; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [1,2,4,8]
284; SSE2-NEXT:    movdqa %xmm4, %xmm7
285; SSE2-NEXT:    pand %xmm6, %xmm7
286; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
287; SSE2-NEXT:    pand %xmm7, %xmm0
288; SSE2-NEXT:    pandn %xmm2, %xmm7
289; SSE2-NEXT:    por %xmm7, %xmm0
290; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
291; SSE2-NEXT:    pand %xmm2, %xmm4
292; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
293; SSE2-NEXT:    pand %xmm4, %xmm1
294; SSE2-NEXT:    pandn %xmm3, %xmm4
295; SSE2-NEXT:    por %xmm4, %xmm1
296; SSE2-NEXT:    movdqa %xmm5, (%rsi)
297; SSE2-NEXT:    retq
298;
299; SSE42-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
300; SSE42:       # %bb.0:
301; SSE42-NEXT:    movaps %xmm0, %xmm4
302; SSE42-NEXT:    movd %edi, %xmm0
303; SSE42-NEXT:    pshuflw {{.*#+}} xmm5 = xmm0[0,0,0,0,4,5,6,7]
304; SSE42-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,0,0]
305; SSE42-NEXT:    pmovzxbw {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128]
306; SSE42-NEXT:    pand %xmm5, %xmm6
307; SSE42-NEXT:    pcmpeqw %xmm5, %xmm6
308; SSE42-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[0,0,0,0]
309; SSE42-NEXT:    pmovsxbd {{.*#+}} xmm7 = [1,2,4,8]
310; SSE42-NEXT:    movdqa %xmm5, %xmm0
311; SSE42-NEXT:    pand %xmm7, %xmm0
312; SSE42-NEXT:    pcmpeqd %xmm7, %xmm0
313; SSE42-NEXT:    blendvps %xmm0, %xmm4, %xmm2
314; SSE42-NEXT:    pmovzxbd {{.*#+}} xmm0 = [16,32,64,128]
315; SSE42-NEXT:    pand %xmm0, %xmm5
316; SSE42-NEXT:    pcmpeqd %xmm0, %xmm5
317; SSE42-NEXT:    movdqa %xmm5, %xmm0
318; SSE42-NEXT:    blendvps %xmm0, %xmm1, %xmm3
319; SSE42-NEXT:    movdqa %xmm6, (%rsi)
320; SSE42-NEXT:    movaps %xmm2, %xmm0
321; SSE42-NEXT:    movaps %xmm3, %xmm1
322; SSE42-NEXT:    retq
323;
324; AVX2-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
325; AVX2:       # %bb.0:
326; AVX2-NEXT:    vmovd %edi, %xmm2
327; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
328; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
329; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
330; AVX2-NEXT:    vpcmpeqw %xmm3, %xmm4, %xmm3
331; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128]
332; AVX2-NEXT:    vpand %ymm4, %ymm2, %ymm2
333; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm2, %ymm2
334; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
335; AVX2-NEXT:    vmovdqa %xmm3, (%rsi)
336; AVX2-NEXT:    retq
337;
338; AVX512F-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
339; AVX512F:       # %bb.0:
340; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
341; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
342; AVX512F-NEXT:    kmovw %edi, %k1
343; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1
344; AVX512F-NEXT:    vpmovdw %zmm2, %ymm2
345; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
346; AVX512F-NEXT:    vmovdqa %xmm2, (%rsi)
347; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
348; AVX512F-NEXT:    retq
349;
350; AVX512VL-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
351; AVX512VL:       # %bb.0:
352; AVX512VL-NEXT:    kmovw %edi, %k1
353; AVX512VL-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
354; AVX512VL-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
355; AVX512VL-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
356; AVX512VL-NEXT:    vpmovdw %ymm2, (%rsi)
357; AVX512VL-NEXT:    retq
358  %z = bitcast i8 %m to <8 x i1>
359  %s = sext <8 x i1> %z to <8 x i16>
360  %v = select <8 x i1> %z, <8 x float> %x, <8 x float> %y
361  store <8 x i16> %s, ptr %o
362  ret <8 x float> %v
363}
364