xref: /llvm-project/llvm/test/CodeGen/X86/avx512-select.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX512F
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=X86,X86-AVX512BW
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=X64,X64-AVX512BW
6
7define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind {
8; X86-LABEL: select00:
9; X86:       # %bb.0:
10; X86-NEXT:    cmpl $255, {{[0-9]+}}(%esp)
11; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12; X86-NEXT:    je .LBB0_2
13; X86-NEXT:  # %bb.1:
14; X86-NEXT:    vmovdqa64 %zmm0, %zmm1
15; X86-NEXT:  .LBB0_2:
16; X86-NEXT:    vpxord %zmm1, %zmm0, %zmm0
17; X86-NEXT:    retl
18;
19; X64-LABEL: select00:
20; X64:       # %bb.0:
21; X64-NEXT:    cmpl $255, %edi
22; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23; X64-NEXT:    je .LBB0_2
24; X64-NEXT:  # %bb.1:
25; X64-NEXT:    vmovdqa64 %zmm0, %zmm1
26; X64-NEXT:  .LBB0_2:
27; X64-NEXT:    vpxord %zmm1, %zmm0, %zmm0
28; X64-NEXT:    retq
29  %cmpres = icmp eq i32 %a, 255
30  %selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b
31  %res = xor <16 x i32> %b, %selres
32  ret <16 x i32> %res
33}
34
35define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind {
36; X86-LABEL: select01:
37; X86:       # %bb.0:
38; X86-NEXT:    cmpl $255, {{[0-9]+}}(%esp)
39; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
40; X86-NEXT:    je .LBB1_2
41; X86-NEXT:  # %bb.1:
42; X86-NEXT:    vmovdqa64 %zmm0, %zmm1
43; X86-NEXT:  .LBB1_2:
44; X86-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
45; X86-NEXT:    retl
46;
47; X64-LABEL: select01:
48; X64:       # %bb.0:
49; X64-NEXT:    cmpl $255, %edi
50; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
51; X64-NEXT:    je .LBB1_2
52; X64-NEXT:  # %bb.1:
53; X64-NEXT:    vmovdqa64 %zmm0, %zmm1
54; X64-NEXT:  .LBB1_2:
55; X64-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
56; X64-NEXT:    retq
57  %cmpres = icmp eq i32 %a, 255
58  %selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b
59  %res = xor <8 x i64> %b, %selres
60  ret <8 x i64> %res
61}
62
63define float @select02(float %a, float %b, float %c, float %eps) {
64; X86-LABEL: select02:
65; X86:       # %bb.0:
66; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
67; X86-NEXT:    vucomiss {{[0-9]+}}(%esp), %xmm0
68; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
69; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
70; X86-NEXT:    cmovael %eax, %ecx
71; X86-NEXT:    flds (%ecx)
72; X86-NEXT:    retl
73;
74; X64-LABEL: select02:
75; X64:       # %bb.0:
76; X64-NEXT:    vcmpless %xmm0, %xmm3, %k1
77; X64-NEXT:    vmovss %xmm2, %xmm1, %xmm1 {%k1}
78; X64-NEXT:    vmovaps %xmm1, %xmm0
79; X64-NEXT:    retq
80  %cmp = fcmp oge float %a, %eps
81  %cond = select i1 %cmp, float %c, float %b
82  ret float %cond
83}
84
85define double @select03(double %a, double %b, double %c, double %eps) {
86; X86-LABEL: select03:
87; X86:       # %bb.0:
88; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
89; X86-NEXT:    vucomisd {{[0-9]+}}(%esp), %xmm0
90; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
91; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
92; X86-NEXT:    cmovael %eax, %ecx
93; X86-NEXT:    fldl (%ecx)
94; X86-NEXT:    retl
95;
96; X64-LABEL: select03:
97; X64:       # %bb.0:
98; X64-NEXT:    vcmplesd %xmm0, %xmm3, %k1
99; X64-NEXT:    vmovsd %xmm2, %xmm1, %xmm1 {%k1}
100; X64-NEXT:    vmovapd %xmm1, %xmm0
101; X64-NEXT:    retq
102  %cmp = fcmp oge double %a, %eps
103  %cond = select i1 %cmp, double %c, double %b
104  ret double %cond
105}
106
107define <16 x double> @select04(<16 x double> %a, <16 x double> %b) {
108; X86-LABEL: select04:
109; X86:       # %bb.0:
110; X86-NEXT:    pushl %ebp
111; X86-NEXT:    .cfi_def_cfa_offset 8
112; X86-NEXT:    .cfi_offset %ebp, -8
113; X86-NEXT:    movl %esp, %ebp
114; X86-NEXT:    .cfi_def_cfa_register %ebp
115; X86-NEXT:    andl $-64, %esp
116; X86-NEXT:    subl $64, %esp
117; X86-NEXT:    vmovaps 8(%ebp), %zmm1
118; X86-NEXT:    movl %ebp, %esp
119; X86-NEXT:    popl %ebp
120; X86-NEXT:    .cfi_def_cfa %esp, 4
121; X86-NEXT:    retl
122;
123; X64-LABEL: select04:
124; X64:       # %bb.0:
125; X64-NEXT:    vmovaps %zmm3, %zmm1
126; X64-NEXT:    retq
127  %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b
128  ret <16 x double> %sel
129}
130
131define i8 @select05(i8 %a.0, i8 %m) {
132; X86-LABEL: select05:
133; X86:       # %bb.0:
134; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
135; X86-NEXT:    orb {{[0-9]+}}(%esp), %al
136; X86-NEXT:    retl
137;
138; X64-LABEL: select05:
139; X64:       # %bb.0:
140; X64-NEXT:    movl %edi, %eax
141; X64-NEXT:    orl %esi, %eax
142; X64-NEXT:    # kill: def $al killed $al killed $eax
143; X64-NEXT:    retq
144  %mask = bitcast i8 %m to <8 x i1>
145  %a = bitcast i8 %a.0 to <8 x i1>
146  %r = select <8 x i1> %mask, <8 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <8 x i1> %a
147  %res = bitcast <8 x i1> %r to i8
148  ret i8 %res;
149}
150
151define i8 @select05_mem(ptr %a.0, ptr %m) {
152; X86-AVX512F-LABEL: select05_mem:
153; X86-AVX512F:       # %bb.0:
154; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
155; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %ecx
156; X86-AVX512F-NEXT:    movzbl (%ecx), %ecx
157; X86-AVX512F-NEXT:    kmovw %ecx, %k0
158; X86-AVX512F-NEXT:    movzbl (%eax), %eax
159; X86-AVX512F-NEXT:    kmovw %eax, %k1
160; X86-AVX512F-NEXT:    korw %k1, %k0, %k0
161; X86-AVX512F-NEXT:    kmovw %k0, %eax
162; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
163; X86-AVX512F-NEXT:    retl
164;
165; X64-AVX512F-LABEL: select05_mem:
166; X64-AVX512F:       # %bb.0:
167; X64-AVX512F-NEXT:    movzbl (%rsi), %eax
168; X64-AVX512F-NEXT:    kmovw %eax, %k0
169; X64-AVX512F-NEXT:    movzbl (%rdi), %eax
170; X64-AVX512F-NEXT:    kmovw %eax, %k1
171; X64-AVX512F-NEXT:    korw %k1, %k0, %k0
172; X64-AVX512F-NEXT:    kmovw %k0, %eax
173; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
174; X64-AVX512F-NEXT:    retq
175;
176; X86-AVX512BW-LABEL: select05_mem:
177; X86-AVX512BW:       # %bb.0:
178; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
179; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
180; X86-AVX512BW-NEXT:    movzbl (%ecx), %ecx
181; X86-AVX512BW-NEXT:    kmovd %ecx, %k0
182; X86-AVX512BW-NEXT:    movzbl (%eax), %eax
183; X86-AVX512BW-NEXT:    kmovd %eax, %k1
184; X86-AVX512BW-NEXT:    korw %k1, %k0, %k0
185; X86-AVX512BW-NEXT:    kmovd %k0, %eax
186; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
187; X86-AVX512BW-NEXT:    retl
188;
189; X64-AVX512BW-LABEL: select05_mem:
190; X64-AVX512BW:       # %bb.0:
191; X64-AVX512BW-NEXT:    movzbl (%rsi), %eax
192; X64-AVX512BW-NEXT:    kmovd %eax, %k0
193; X64-AVX512BW-NEXT:    movzbl (%rdi), %eax
194; X64-AVX512BW-NEXT:    kmovd %eax, %k1
195; X64-AVX512BW-NEXT:    korw %k1, %k0, %k0
196; X64-AVX512BW-NEXT:    kmovd %k0, %eax
197; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
198; X64-AVX512BW-NEXT:    retq
199  %mask = load <8 x i1> , ptr %m
200  %a = load <8 x i1> , ptr %a.0
201  %r = select <8 x i1> %mask, <8 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <8 x i1> %a
202  %res = bitcast <8 x i1> %r to i8
203  ret i8 %res;
204}
205
206define i8 @select06(i8 %a.0, i8 %m) {
207; X86-LABEL: select06:
208; X86:       # %bb.0:
209; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
210; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
211; X86-NEXT:    retl
212;
213; X64-LABEL: select06:
214; X64:       # %bb.0:
215; X64-NEXT:    movl %edi, %eax
216; X64-NEXT:    andl %esi, %eax
217; X64-NEXT:    # kill: def $al killed $al killed $eax
218; X64-NEXT:    retq
219  %mask = bitcast i8 %m to <8 x i1>
220  %a = bitcast i8 %a.0 to <8 x i1>
221  %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer
222  %res = bitcast <8 x i1> %r to i8
223  ret i8 %res;
224}
225
226define i8 @select06_mem(ptr %a.0, ptr %m) {
227; X86-AVX512F-LABEL: select06_mem:
228; X86-AVX512F:       # %bb.0:
229; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
230; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %ecx
231; X86-AVX512F-NEXT:    movzbl (%ecx), %ecx
232; X86-AVX512F-NEXT:    kmovw %ecx, %k0
233; X86-AVX512F-NEXT:    movzbl (%eax), %eax
234; X86-AVX512F-NEXT:    kmovw %eax, %k1
235; X86-AVX512F-NEXT:    kandw %k1, %k0, %k0
236; X86-AVX512F-NEXT:    kmovw %k0, %eax
237; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
238; X86-AVX512F-NEXT:    retl
239;
240; X64-AVX512F-LABEL: select06_mem:
241; X64-AVX512F:       # %bb.0:
242; X64-AVX512F-NEXT:    movzbl (%rsi), %eax
243; X64-AVX512F-NEXT:    kmovw %eax, %k0
244; X64-AVX512F-NEXT:    movzbl (%rdi), %eax
245; X64-AVX512F-NEXT:    kmovw %eax, %k1
246; X64-AVX512F-NEXT:    kandw %k1, %k0, %k0
247; X64-AVX512F-NEXT:    kmovw %k0, %eax
248; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
249; X64-AVX512F-NEXT:    retq
250;
251; X86-AVX512BW-LABEL: select06_mem:
252; X86-AVX512BW:       # %bb.0:
253; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
254; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
255; X86-AVX512BW-NEXT:    movzbl (%ecx), %ecx
256; X86-AVX512BW-NEXT:    kmovd %ecx, %k0
257; X86-AVX512BW-NEXT:    movzbl (%eax), %eax
258; X86-AVX512BW-NEXT:    kmovd %eax, %k1
259; X86-AVX512BW-NEXT:    kandw %k1, %k0, %k0
260; X86-AVX512BW-NEXT:    kmovd %k0, %eax
261; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
262; X86-AVX512BW-NEXT:    retl
263;
264; X64-AVX512BW-LABEL: select06_mem:
265; X64-AVX512BW:       # %bb.0:
266; X64-AVX512BW-NEXT:    movzbl (%rsi), %eax
267; X64-AVX512BW-NEXT:    kmovd %eax, %k0
268; X64-AVX512BW-NEXT:    movzbl (%rdi), %eax
269; X64-AVX512BW-NEXT:    kmovd %eax, %k1
270; X64-AVX512BW-NEXT:    kandw %k1, %k0, %k0
271; X64-AVX512BW-NEXT:    kmovd %k0, %eax
272; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
273; X64-AVX512BW-NEXT:    retq
274  %mask = load <8 x i1> , ptr %m
275  %a = load <8 x i1> , ptr %a.0
276  %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer
277  %res = bitcast <8 x i1> %r to i8
278  ret i8 %res;
279}
280define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) {
281; X86-AVX512F-LABEL: select07:
282; X86-AVX512F:       # %bb.0:
283; X86-AVX512F-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
284; X86-AVX512F-NEXT:    kmovw %eax, %k0
285; X86-AVX512F-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
286; X86-AVX512F-NEXT:    kmovw %eax, %k1
287; X86-AVX512F-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
288; X86-AVX512F-NEXT:    kmovw %eax, %k2
289; X86-AVX512F-NEXT:    kandnw %k2, %k0, %k2
290; X86-AVX512F-NEXT:    kandw %k0, %k1, %k0
291; X86-AVX512F-NEXT:    korw %k2, %k0, %k0
292; X86-AVX512F-NEXT:    kmovw %k0, %eax
293; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
294; X86-AVX512F-NEXT:    retl
295;
296; X64-AVX512F-LABEL: select07:
297; X64-AVX512F:       # %bb.0:
298; X64-AVX512F-NEXT:    kmovw %edx, %k0
299; X64-AVX512F-NEXT:    kmovw %edi, %k1
300; X64-AVX512F-NEXT:    kmovw %esi, %k2
301; X64-AVX512F-NEXT:    kandnw %k2, %k0, %k2
302; X64-AVX512F-NEXT:    kandw %k0, %k1, %k0
303; X64-AVX512F-NEXT:    korw %k2, %k0, %k0
304; X64-AVX512F-NEXT:    kmovw %k0, %eax
305; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
306; X64-AVX512F-NEXT:    retq
307;
308; X86-AVX512BW-LABEL: select07:
309; X86-AVX512BW:       # %bb.0:
310; X86-AVX512BW-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
311; X86-AVX512BW-NEXT:    kmovd %eax, %k0
312; X86-AVX512BW-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
313; X86-AVX512BW-NEXT:    kmovd %eax, %k1
314; X86-AVX512BW-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
315; X86-AVX512BW-NEXT:    kmovd %eax, %k2
316; X86-AVX512BW-NEXT:    kandnw %k2, %k0, %k2
317; X86-AVX512BW-NEXT:    kandw %k0, %k1, %k0
318; X86-AVX512BW-NEXT:    korw %k2, %k0, %k0
319; X86-AVX512BW-NEXT:    kmovd %k0, %eax
320; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
321; X86-AVX512BW-NEXT:    retl
322;
323; X64-AVX512BW-LABEL: select07:
324; X64-AVX512BW:       # %bb.0:
325; X64-AVX512BW-NEXT:    kmovd %edx, %k0
326; X64-AVX512BW-NEXT:    kmovd %edi, %k1
327; X64-AVX512BW-NEXT:    kmovd %esi, %k2
328; X64-AVX512BW-NEXT:    kandnw %k2, %k0, %k2
329; X64-AVX512BW-NEXT:    kandw %k0, %k1, %k0
330; X64-AVX512BW-NEXT:    korw %k2, %k0, %k0
331; X64-AVX512BW-NEXT:    kmovd %k0, %eax
332; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
333; X64-AVX512BW-NEXT:    retq
334  %mask = bitcast i8 %m to <8 x i1>
335  %a = bitcast i8 %a.0 to <8 x i1>
336  %b = bitcast i8 %b.0 to <8 x i1>
337  %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> %b
338  %res = bitcast <8 x i1> %r to i8
339  ret i8 %res;
340}
341
342define i64 @pr30249() {
343; X86-LABEL: pr30249:
344; X86:       # %bb.0:
345; X86-NEXT:    movl $1, %eax
346; X86-NEXT:    xorl %edx, %edx
347; X86-NEXT:    retl
348;
349; X64-LABEL: pr30249:
350; X64:       # %bb.0:
351; X64-NEXT:    movl $1, %eax
352; X64-NEXT:    retq
353  %v = select i1 undef , i64 1, i64 2
354  ret i64 %v
355}
356
357define double @pr30561_f64(double %b, double %a, i1 %c) {
358; X86-LABEL: pr30561_f64:
359; X86:       # %bb.0:
360; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
361; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
362; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
363; X86-NEXT:    cmovnel %eax, %ecx
364; X86-NEXT:    fldl (%ecx)
365; X86-NEXT:    retl
366;
367; X64-AVX512F-LABEL: pr30561_f64:
368; X64-AVX512F:       # %bb.0:
369; X64-AVX512F-NEXT:    kmovw %edi, %k1
370; X64-AVX512F-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
371; X64-AVX512F-NEXT:    retq
372;
373; X64-AVX512BW-LABEL: pr30561_f64:
374; X64-AVX512BW:       # %bb.0:
375; X64-AVX512BW-NEXT:    kmovd %edi, %k1
376; X64-AVX512BW-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
377; X64-AVX512BW-NEXT:    retq
378  %cond = select i1 %c, double %a, double %b
379  ret double %cond
380}
381
382define float @pr30561_f32(float %b, float %a, i1 %c) {
383; X86-LABEL: pr30561_f32:
384; X86:       # %bb.0:
385; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
386; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
387; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
388; X86-NEXT:    cmovnel %eax, %ecx
389; X86-NEXT:    flds (%ecx)
390; X86-NEXT:    retl
391;
392; X64-AVX512F-LABEL: pr30561_f32:
393; X64-AVX512F:       # %bb.0:
394; X64-AVX512F-NEXT:    kmovw %edi, %k1
395; X64-AVX512F-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
396; X64-AVX512F-NEXT:    retq
397;
398; X64-AVX512BW-LABEL: pr30561_f32:
399; X64-AVX512BW:       # %bb.0:
400; X64-AVX512BW-NEXT:    kmovd %edi, %k1
401; X64-AVX512BW-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
402; X64-AVX512BW-NEXT:    retq
403  %cond = select i1 %c, float %a, float %b
404  ret float %cond
405}
406
407define <16 x i16> @pr31515(<16 x i1> %a, <16 x i1> %b, <16 x i16> %c) nounwind {
408; X86-AVX512F-LABEL: pr31515:
409; X86-AVX512F:       # %bb.0:
410; X86-AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
411; X86-AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
412; X86-AVX512F-NEXT:    vpsllw $15, %ymm0, %ymm0
413; X86-AVX512F-NEXT:    vpsraw $15, %ymm0, %ymm0
414; X86-AVX512F-NEXT:    vpandn %ymm2, %ymm0, %ymm0
415; X86-AVX512F-NEXT:    retl
416;
417; X64-AVX512F-LABEL: pr31515:
418; X64-AVX512F:       # %bb.0:
419; X64-AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
420; X64-AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
421; X64-AVX512F-NEXT:    vpsllw $15, %ymm0, %ymm0
422; X64-AVX512F-NEXT:    vpsraw $15, %ymm0, %ymm0
423; X64-AVX512F-NEXT:    vpandn %ymm2, %ymm0, %ymm0
424; X64-AVX512F-NEXT:    retq
425;
426; X86-AVX512BW-LABEL: pr31515:
427; X86-AVX512BW:       # %bb.0:
428; X86-AVX512BW-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
429; X86-AVX512BW-NEXT:    vpand %xmm1, %xmm0, %xmm0
430; X86-AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
431; X86-AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
432; X86-AVX512BW-NEXT:    knotw %k0, %k1
433; X86-AVX512BW-NEXT:    vmovdqu16 %zmm2, %zmm0 {%k1} {z}
434; X86-AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
435; X86-AVX512BW-NEXT:    retl
436;
437; X64-AVX512BW-LABEL: pr31515:
438; X64-AVX512BW:       # %bb.0:
439; X64-AVX512BW-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
440; X64-AVX512BW-NEXT:    vpand %xmm1, %xmm0, %xmm0
441; X64-AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
442; X64-AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
443; X64-AVX512BW-NEXT:    knotw %k0, %k1
444; X64-AVX512BW-NEXT:    vmovdqu16 %zmm2, %zmm0 {%k1} {z}
445; X64-AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
446; X64-AVX512BW-NEXT:    retq
447  %mask = and <16 x i1> %a, %b
448  %res = select <16 x i1> %mask, <16 x i16> zeroinitializer, <16 x i16> %c
449  ret <16 x i16> %res
450}
451
452define <32 x i16> @pr42355_v32i16(i1 %c, <32 x i16> %x, <32 x i16> %y) {
453; X86-LABEL: pr42355_v32i16:
454; X86:       # %bb.0:
455; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
456; X86-NEXT:    jne .LBB14_2
457; X86-NEXT:  # %bb.1:
458; X86-NEXT:    vmovaps %zmm1, %zmm0
459; X86-NEXT:  .LBB14_2:
460; X86-NEXT:    retl
461;
462; X64-LABEL: pr42355_v32i16:
463; X64:       # %bb.0:
464; X64-NEXT:    testb $1, %dil
465; X64-NEXT:    jne .LBB14_2
466; X64-NEXT:  # %bb.1:
467; X64-NEXT:    vmovaps %zmm1, %zmm0
468; X64-NEXT:  .LBB14_2:
469; X64-NEXT:    retq
470  %a = select i1 %c, <32 x i16> %x, <32 x i16> %y
471  ret <32 x i16> %a
472}
473
474define <64 x i8> @pr42355_v64i8(i1 %c, <64 x i8> %x, <64 x i8> %y) {
475; X86-LABEL: pr42355_v64i8:
476; X86:       # %bb.0:
477; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
478; X86-NEXT:    jne .LBB15_2
479; X86-NEXT:  # %bb.1:
480; X86-NEXT:    vmovaps %zmm1, %zmm0
481; X86-NEXT:  .LBB15_2:
482; X86-NEXT:    retl
483;
484; X64-LABEL: pr42355_v64i8:
485; X64:       # %bb.0:
486; X64-NEXT:    testb $1, %dil
487; X64-NEXT:    jne .LBB15_2
488; X64-NEXT:  # %bb.1:
489; X64-NEXT:    vmovaps %zmm1, %zmm0
490; X64-NEXT:  .LBB15_2:
491; X64-NEXT:    retq
492  %a = select i1 %c, <64 x i8> %x, <64 x i8> %y
493  ret <64 x i8> %a
494}
495
496; This would crash because AVX512 has legal vector select
497; condition values that are not 256/512-bit vectors.
498
499define <16 x i64> @narrowExtractedVectorSelect_crash(<16 x i64> %arg, <16 x i16> %arg1) #0 {
500; X86-AVX512F-LABEL: narrowExtractedVectorSelect_crash:
501; X86-AVX512F:       # %bb.0:
502; X86-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
503; X86-AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
504; X86-AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
505; X86-AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
506; X86-AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
507; X86-AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm1
508; X86-AVX512F-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
509; X86-AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
510; X86-AVX512F-NEXT:    vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
511; X86-AVX512F-NEXT:    retl
512;
513; X64-AVX512F-LABEL: narrowExtractedVectorSelect_crash:
514; X64-AVX512F:       # %bb.0:
515; X64-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
516; X64-AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
517; X64-AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
518; X64-AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
519; X64-AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
520; X64-AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm1
521; X64-AVX512F-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
522; X64-AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
523; X64-AVX512F-NEXT:    vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
524; X64-AVX512F-NEXT:    retq
525;
526; X86-AVX512BW-LABEL: narrowExtractedVectorSelect_crash:
527; X86-AVX512BW:       # %bb.0:
528; X86-AVX512BW-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
529; X86-AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
530; X86-AVX512BW-NEXT:    vptestmq %zmm1, %zmm1, %k1
531; X86-AVX512BW-NEXT:    kunpckbw %k0, %k1, %k1
532; X86-AVX512BW-NEXT:    vmovdqu16 %zmm2, %zmm1 {%k1} {z}
533; X86-AVX512BW-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
534; X86-AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm1
535; X86-AVX512BW-NEXT:    vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
536; X86-AVX512BW-NEXT:    retl
537;
538; X64-AVX512BW-LABEL: narrowExtractedVectorSelect_crash:
539; X64-AVX512BW:       # %bb.0:
540; X64-AVX512BW-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
541; X64-AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
542; X64-AVX512BW-NEXT:    vptestmq %zmm1, %zmm1, %k1
543; X64-AVX512BW-NEXT:    kunpckbw %k0, %k1, %k1
544; X64-AVX512BW-NEXT:    vmovdqu16 %zmm2, %zmm1 {%k1} {z}
545; X64-AVX512BW-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
546; X64-AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm1
547; X64-AVX512BW-NEXT:    vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
548; X64-AVX512BW-NEXT:    retq
549  %tmp = icmp ne <16 x i64> %arg, zeroinitializer
550  %tmp2 = select <16 x i1> %tmp, <16 x i16> %arg1, <16 x i16> zeroinitializer
551  %tmp3 = zext <16 x i16> %tmp2 to <16 x i64>
552  ret <16 x i64> %tmp3
553}
554
555define void @vselect_v1i1(ptr %w, ptr %x, ptr %y) nounwind {
556; X86-AVX512F-LABEL: vselect_v1i1:
557; X86-AVX512F:       # %bb.0:
558; X86-AVX512F-NEXT:    pushl %esi
559; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %ecx
560; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %edx
561; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
562; X86-AVX512F-NEXT:    movzbl (%eax), %esi
563; X86-AVX512F-NEXT:    kmovw %esi, %k0
564; X86-AVX512F-NEXT:    movzbl (%edx), %edx
565; X86-AVX512F-NEXT:    kmovw %edx, %k1
566; X86-AVX512F-NEXT:    movzbl (%ecx), %ecx
567; X86-AVX512F-NEXT:    kmovw %ecx, %k2
568; X86-AVX512F-NEXT:    kandnw %k1, %k2, %k1
569; X86-AVX512F-NEXT:    kandw %k2, %k0, %k0
570; X86-AVX512F-NEXT:    korw %k1, %k0, %k0
571; X86-AVX512F-NEXT:    kshiftlw $15, %k0, %k0
572; X86-AVX512F-NEXT:    kshiftrw $15, %k0, %k0
573; X86-AVX512F-NEXT:    kmovw %k0, %ecx
574; X86-AVX512F-NEXT:    movb %cl, (%eax)
575; X86-AVX512F-NEXT:    popl %esi
576; X86-AVX512F-NEXT:    retl
577;
578; X64-AVX512F-LABEL: vselect_v1i1:
579; X64-AVX512F:       # %bb.0:
580; X64-AVX512F-NEXT:    movzbl (%rsi), %eax
581; X64-AVX512F-NEXT:    kmovw %eax, %k0
582; X64-AVX512F-NEXT:    movzbl (%rdx), %eax
583; X64-AVX512F-NEXT:    kmovw %eax, %k1
584; X64-AVX512F-NEXT:    movzbl (%rdi), %eax
585; X64-AVX512F-NEXT:    kmovw %eax, %k2
586; X64-AVX512F-NEXT:    kandnw %k1, %k2, %k1
587; X64-AVX512F-NEXT:    kandw %k2, %k0, %k0
588; X64-AVX512F-NEXT:    korw %k1, %k0, %k0
589; X64-AVX512F-NEXT:    kshiftlw $15, %k0, %k0
590; X64-AVX512F-NEXT:    kshiftrw $15, %k0, %k0
591; X64-AVX512F-NEXT:    kmovw %k0, %eax
592; X64-AVX512F-NEXT:    movb %al, (%rsi)
593; X64-AVX512F-NEXT:    retq
594;
595; X86-AVX512BW-LABEL: vselect_v1i1:
596; X86-AVX512BW:       # %bb.0:
597; X86-AVX512BW-NEXT:    pushl %esi
598; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
599; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %edx
600; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
601; X86-AVX512BW-NEXT:    movzbl (%eax), %esi
602; X86-AVX512BW-NEXT:    kmovd %esi, %k0
603; X86-AVX512BW-NEXT:    movzbl (%edx), %edx
604; X86-AVX512BW-NEXT:    kmovd %edx, %k1
605; X86-AVX512BW-NEXT:    movzbl (%ecx), %ecx
606; X86-AVX512BW-NEXT:    kmovd %ecx, %k2
607; X86-AVX512BW-NEXT:    kandnw %k1, %k2, %k1
608; X86-AVX512BW-NEXT:    kandw %k2, %k0, %k0
609; X86-AVX512BW-NEXT:    korw %k1, %k0, %k0
610; X86-AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
611; X86-AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
612; X86-AVX512BW-NEXT:    kmovd %k0, %ecx
613; X86-AVX512BW-NEXT:    movb %cl, (%eax)
614; X86-AVX512BW-NEXT:    popl %esi
615; X86-AVX512BW-NEXT:    retl
616;
617; X64-AVX512BW-LABEL: vselect_v1i1:
618; X64-AVX512BW:       # %bb.0:
619; X64-AVX512BW-NEXT:    movzbl (%rsi), %eax
620; X64-AVX512BW-NEXT:    kmovd %eax, %k0
621; X64-AVX512BW-NEXT:    movzbl (%rdx), %eax
622; X64-AVX512BW-NEXT:    kmovd %eax, %k1
623; X64-AVX512BW-NEXT:    movzbl (%rdi), %eax
624; X64-AVX512BW-NEXT:    kmovd %eax, %k2
625; X64-AVX512BW-NEXT:    kandnw %k1, %k2, %k1
626; X64-AVX512BW-NEXT:    kandw %k2, %k0, %k0
627; X64-AVX512BW-NEXT:    korw %k1, %k0, %k0
628; X64-AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
629; X64-AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
630; X64-AVX512BW-NEXT:    kmovd %k0, %eax
631; X64-AVX512BW-NEXT:    movb %al, (%rsi)
632; X64-AVX512BW-NEXT:    retq
633  %a = load <1 x i1>, ptr %x
634  %b = load <1 x i1>, ptr %y
635  %b2 = load <1 x i1>, ptr %w
636  %c = select <1 x i1> %b2, <1 x i1> %a, <1 x i1> %b
637  store <1 x i1> %c, ptr %x
638  ret void
639}
640
641; Scalar condition with v1i1 operands
642define void @select_v1i1(ptr %w, ptr %x, ptr %y, i1 %z) nounwind {
643; X86-AVX512F-LABEL: select_v1i1:
644; X86-AVX512F:       # %bb.0:
645; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
646; X86-AVX512F-NEXT:    testb $1, {{[0-9]+}}(%esp)
647; X86-AVX512F-NEXT:    jne .LBB18_1
648; X86-AVX512F-NEXT:  # %bb.2:
649; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %ecx
650; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %edx
651; X86-AVX512F-NEXT:    movzbl (%edx), %edx
652; X86-AVX512F-NEXT:    kmovw %edx, %k0
653; X86-AVX512F-NEXT:    movzbl (%ecx), %ecx
654; X86-AVX512F-NEXT:    kmovw %ecx, %k1
655; X86-AVX512F-NEXT:    kxorw %k1, %k0, %k0
656; X86-AVX512F-NEXT:    jmp .LBB18_3
657; X86-AVX512F-NEXT:  .LBB18_1:
658; X86-AVX512F-NEXT:    movzbl (%eax), %ecx
659; X86-AVX512F-NEXT:    kmovw %ecx, %k0
660; X86-AVX512F-NEXT:  .LBB18_3:
661; X86-AVX512F-NEXT:    kshiftlw $15, %k0, %k0
662; X86-AVX512F-NEXT:    kshiftrw $15, %k0, %k0
663; X86-AVX512F-NEXT:    kmovw %k0, %ecx
664; X86-AVX512F-NEXT:    movb %cl, (%eax)
665; X86-AVX512F-NEXT:    retl
666;
667; X64-AVX512F-LABEL: select_v1i1:
668; X64-AVX512F:       # %bb.0:
669; X64-AVX512F-NEXT:    testb $1, %cl
670; X64-AVX512F-NEXT:    jne .LBB18_1
671; X64-AVX512F-NEXT:  # %bb.2:
672; X64-AVX512F-NEXT:    movzbl (%rdx), %eax
673; X64-AVX512F-NEXT:    kmovw %eax, %k0
674; X64-AVX512F-NEXT:    movzbl (%rdi), %eax
675; X64-AVX512F-NEXT:    kmovw %eax, %k1
676; X64-AVX512F-NEXT:    kxorw %k1, %k0, %k0
677; X64-AVX512F-NEXT:    jmp .LBB18_3
678; X64-AVX512F-NEXT:  .LBB18_1:
679; X64-AVX512F-NEXT:    movzbl (%rsi), %eax
680; X64-AVX512F-NEXT:    kmovw %eax, %k0
681; X64-AVX512F-NEXT:  .LBB18_3:
682; X64-AVX512F-NEXT:    kshiftlw $15, %k0, %k0
683; X64-AVX512F-NEXT:    kshiftrw $15, %k0, %k0
684; X64-AVX512F-NEXT:    kmovw %k0, %eax
685; X64-AVX512F-NEXT:    movb %al, (%rsi)
686; X64-AVX512F-NEXT:    retq
687;
688; X86-AVX512BW-LABEL: select_v1i1:
689; X86-AVX512BW:       # %bb.0:
690; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
691; X86-AVX512BW-NEXT:    testb $1, {{[0-9]+}}(%esp)
692; X86-AVX512BW-NEXT:    jne .LBB18_1
693; X86-AVX512BW-NEXT:  # %bb.2:
694; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
695; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %edx
696; X86-AVX512BW-NEXT:    movzbl (%edx), %edx
697; X86-AVX512BW-NEXT:    kmovd %edx, %k0
698; X86-AVX512BW-NEXT:    movzbl (%ecx), %ecx
699; X86-AVX512BW-NEXT:    kmovd %ecx, %k1
700; X86-AVX512BW-NEXT:    kxorw %k1, %k0, %k0
701; X86-AVX512BW-NEXT:    jmp .LBB18_3
702; X86-AVX512BW-NEXT:  .LBB18_1:
703; X86-AVX512BW-NEXT:    movzbl (%eax), %ecx
704; X86-AVX512BW-NEXT:    kmovd %ecx, %k0
705; X86-AVX512BW-NEXT:  .LBB18_3:
706; X86-AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
707; X86-AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
708; X86-AVX512BW-NEXT:    kmovd %k0, %ecx
709; X86-AVX512BW-NEXT:    movb %cl, (%eax)
710; X86-AVX512BW-NEXT:    retl
711;
712; X64-AVX512BW-LABEL: select_v1i1:
713; X64-AVX512BW:       # %bb.0:
714; X64-AVX512BW-NEXT:    testb $1, %cl
715; X64-AVX512BW-NEXT:    jne .LBB18_1
716; X64-AVX512BW-NEXT:  # %bb.2:
717; X64-AVX512BW-NEXT:    movzbl (%rdx), %eax
718; X64-AVX512BW-NEXT:    kmovd %eax, %k0
719; X64-AVX512BW-NEXT:    movzbl (%rdi), %eax
720; X64-AVX512BW-NEXT:    kmovd %eax, %k1
721; X64-AVX512BW-NEXT:    kxorw %k1, %k0, %k0
722; X64-AVX512BW-NEXT:    jmp .LBB18_3
723; X64-AVX512BW-NEXT:  .LBB18_1:
724; X64-AVX512BW-NEXT:    movzbl (%rsi), %eax
725; X64-AVX512BW-NEXT:    kmovd %eax, %k0
726; X64-AVX512BW-NEXT:  .LBB18_3:
727; X64-AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
728; X64-AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
729; X64-AVX512BW-NEXT:    kmovd %k0, %eax
730; X64-AVX512BW-NEXT:    movb %al, (%rsi)
731; X64-AVX512BW-NEXT:    retq
732  %a = load <1 x i1>, ptr %x
733  %b = load <1 x i1>, ptr %y
734  %b2 = load <1 x i1>, ptr %w
735  %b3 = xor <1 x i1> %b, %b2
736  %c = select i1 %z, <1 x i1> %a, <1 x i1> %b3
737  store <1 x i1> %c, ptr %x
738  ret void
739}
740
741; Regression test from https://github.com/JuliaLang/julia/issues/36955
742define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
743; X86-AVX512F-LABEL: julia_issue36955:
744; X86-AVX512F:       # %bb.0:
745; X86-AVX512F-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
746; X86-AVX512F-NEXT:    vcmplepd %zmm0, %zmm1, %k0
747; X86-AVX512F-NEXT:    kmovw %k0, %eax
748; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
749; X86-AVX512F-NEXT:    vzeroupper
750; X86-AVX512F-NEXT:    retl
751;
752; X64-AVX512F-LABEL: julia_issue36955:
753; X64-AVX512F:       # %bb.0:
754; X64-AVX512F-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
755; X64-AVX512F-NEXT:    vcmplepd %zmm0, %zmm1, %k0
756; X64-AVX512F-NEXT:    kmovw %k0, %eax
757; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
758; X64-AVX512F-NEXT:    vzeroupper
759; X64-AVX512F-NEXT:    retq
760;
761; X86-AVX512BW-LABEL: julia_issue36955:
762; X86-AVX512BW:       # %bb.0:
763; X86-AVX512BW-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
764; X86-AVX512BW-NEXT:    vcmplepd %zmm0, %zmm1, %k0
765; X86-AVX512BW-NEXT:    kmovd %k0, %eax
766; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
767; X86-AVX512BW-NEXT:    vzeroupper
768; X86-AVX512BW-NEXT:    retl
769;
770; X64-AVX512BW-LABEL: julia_issue36955:
771; X64-AVX512BW:       # %bb.0:
772; X64-AVX512BW-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
773; X64-AVX512BW-NEXT:    vcmplepd %zmm0, %zmm1, %k0
774; X64-AVX512BW-NEXT:    kmovd %k0, %eax
775; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
776; X64-AVX512BW-NEXT:    vzeroupper
777; X64-AVX512BW-NEXT:    retq
778  %fcmp = fcmp ugt <8 x double> %a, zeroinitializer
779  %xor = xor <8 x i1> %fcmp, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
780  %select1 = select <8 x i1> %fcmp, <8 x i1> zeroinitializer, <8 x i1> %mask
781  %select2 = select <8 x i1> %xor, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i1> %select1
782  %ret = bitcast <8 x i1> %select2 to i8
783  ret i8 %ret
784}
785