xref: /llvm-project/llvm/test/CodeGen/X86/avx512-vec-cmp.ll (revision e088249b74586590c9e143d85b97a175acc9465e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=KNL
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
4; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
5
6define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
7; CHECK-LABEL: test1:
8; CHECK:       ## %bb.0:
9; CHECK-NEXT:    vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
10; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
11; CHECK-NEXT:    retq ## encoding: [0xc3]
12  %mask = fcmp ole <16 x float> %x, %y
13  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
14  ret <16 x float> %max
15}
16
17define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
18; CHECK-LABEL: test2:
19; CHECK:       ## %bb.0:
20; CHECK-NEXT:    vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
21; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
22; CHECK-NEXT:    retq ## encoding: [0xc3]
23  %mask = fcmp ole <8 x double> %x, %y
24  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
25  ret <8 x double> %max
26}
27
28define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, ptr %yp) nounwind {
29; CHECK-LABEL: test3:
30; CHECK:       ## %bb.0:
31; CHECK-NEXT:    vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
32; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
33; CHECK-NEXT:    retq ## encoding: [0xc3]
34  %y = load <16 x i32>, ptr %yp, align 4
35  %mask = icmp eq <16 x i32> %x, %y
36  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
37  ret <16 x i32> %max
38}
39
40define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
41; CHECK-LABEL: test4_unsigned:
42; CHECK:       ## %bb.0:
43; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
44; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
45; CHECK-NEXT:    retq ## encoding: [0xc3]
46  %mask = icmp uge <16 x i32> %x, %y
47  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
48  ret <16 x i32> %max
49}
50
51define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
52; CHECK-LABEL: test5:
53; CHECK:       ## %bb.0:
54; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
55; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
56; CHECK-NEXT:    retq ## encoding: [0xc3]
57  %mask = icmp eq <8 x i64> %x, %y
58  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
59  ret <8 x i64> %max
60}
61
62define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
63; CHECK-LABEL: test6_unsigned:
64; CHECK:       ## %bb.0:
65; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
66; CHECK-NEXT:    vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
67; CHECK-NEXT:    retq ## encoding: [0xc3]
68  %mask = icmp ugt <8 x i64> %x, %y
69  %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
70  ret <8 x i64> %max
71}
72
73define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
74; AVX512-LABEL: test7:
75; AVX512:       ## %bb.0:
76; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
77; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
78; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
79; AVX512-NEXT:    vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
80; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
81; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
82; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
83; AVX512-NEXT:    retq ## encoding: [0xc3]
84;
85; SKX-LABEL: test7:
86; SKX:       ## %bb.0:
87; SKX-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2]
88; SKX-NEXT:    vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
89; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
90; SKX-NEXT:    retq ## encoding: [0xc3]
91
92  %mask = fcmp olt <4 x float> %a, zeroinitializer
93  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
94  ret <4 x float>%c
95}
96
97define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
98; AVX512-LABEL: test8:
99; AVX512:       ## %bb.0:
100; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
101; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
102; AVX512-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2]
103; AVX512-NEXT:    vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
104; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
105; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
106; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
107; AVX512-NEXT:    retq ## encoding: [0xc3]
108;
109; SKX-LABEL: test8:
110; SKX:       ## %bb.0:
111; SKX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x57,0xd2]
112; SKX-NEXT:    vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
113; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
114; SKX-NEXT:    retq ## encoding: [0xc3]
115  %mask = fcmp olt <2 x double> %a, zeroinitializer
116  %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
117  ret <2 x double>%c
118}
119
120define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
121; AVX512-LABEL: test9:
122; AVX512:       ## %bb.0:
123; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
124; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
125; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
126; AVX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
127; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
128; AVX512-NEXT:    retq ## encoding: [0xc3]
129;
130; SKX-LABEL: test9:
131; SKX:       ## %bb.0:
132; SKX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
133; SKX-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
134; SKX-NEXT:    retq ## encoding: [0xc3]
135  %mask = icmp eq <8 x i32> %x, %y
136  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
137  ret <8 x i32> %max
138}
139
140define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
141; AVX512-LABEL: test10:
142; AVX512:       ## %bb.0:
143; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
144; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
145; AVX512-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x00]
146; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
147; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
148; AVX512-NEXT:    retq ## encoding: [0xc3]
149;
150; SKX-LABEL: test10:
151; SKX:       ## %bb.0:
152; SKX-NEXT:    vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
153; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
154; SKX-NEXT:    retq ## encoding: [0xc3]
155
156  %mask = fcmp oeq <8 x float> %x, %y
157  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
158  ret <8 x float> %max
159}
160
161define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
162; AVX512-LABEL: test11_unsigned:
163; AVX512:       ## %bb.0:
164; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
165; AVX512-NEXT:    retq ## encoding: [0xc3]
166;
167; SKX-LABEL: test11_unsigned:
168; SKX:       ## %bb.0:
169; SKX-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
170; SKX-NEXT:    retq ## encoding: [0xc3]
171  %mask = icmp ugt <8 x i32> %x, %y
172  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
173  ret <8 x i32> %max
174}
175
176define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
177; KNL-LABEL: test12:
178; KNL:       ## %bb.0:
179; KNL-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
180; KNL-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
181; KNL-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
182; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
183; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
184; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
185; KNL-NEXT:    retq ## encoding: [0xc3]
186;
187; AVX512BW-LABEL: test12:
188; AVX512BW:       ## %bb.0:
189; AVX512BW-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
190; AVX512BW-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
191; AVX512BW-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
192; AVX512BW-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
193; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
194; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
195; AVX512BW-NEXT:    retq ## encoding: [0xc3]
196;
197; SKX-LABEL: test12:
198; SKX:       ## %bb.0:
199; SKX-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
200; SKX-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
201; SKX-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
202; SKX-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
203; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
204; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
205; SKX-NEXT:    retq ## encoding: [0xc3]
206  %res = icmp eq <16 x i64> %a, %b
207  %res1 = bitcast <16 x i1> %res to i16
208  ret i16 %res1
209}
210
211define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
212; KNL-LABEL: test12_v32i32:
213; KNL:       ## %bb.0:
214; KNL-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
215; KNL-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
216; KNL-NEXT:    vpcmpeqd %zmm3, %zmm1, %k0 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xc3]
217; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
218; KNL-NEXT:    shll $16, %eax ## encoding: [0xc1,0xe0,0x10]
219; KNL-NEXT:    orl %ecx, %eax ## encoding: [0x09,0xc8]
220; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
221; KNL-NEXT:    retq ## encoding: [0xc3]
222;
223; AVX512BW-LABEL: test12_v32i32:
224; AVX512BW:       ## %bb.0:
225; AVX512BW-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
226; AVX512BW-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xcb]
227; AVX512BW-NEXT:    kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
228; AVX512BW-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
229; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
230; AVX512BW-NEXT:    retq ## encoding: [0xc3]
231;
232; SKX-LABEL: test12_v32i32:
233; SKX:       ## %bb.0:
234; SKX-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
235; SKX-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xcb]
236; SKX-NEXT:    kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
237; SKX-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
238; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
239; SKX-NEXT:    retq ## encoding: [0xc3]
240  %res = icmp eq <32 x i32> %a, %b
241  %res1 = bitcast <32 x i1> %res to i32
242  ret i32 %res1
243}
244
245define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
246; KNL-LABEL: test12_v64i16:
247; KNL:       ## %bb.0:
248; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm4 ## encoding: [0xc5,0xfd,0x75,0xe2]
249; KNL-NEXT:    vpmovsxwd %ymm4, %zmm4 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xe4]
250; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k0 ## encoding: [0x62,0xf2,0x5d,0x48,0x27,0xc4]
251; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
252; KNL-NEXT:    vextracti64x4 $1, %zmm2, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd2,0x01]
253; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc0,0x01]
254; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x75,0xc2]
255; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
256; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
257; KNL-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
258; KNL-NEXT:    shll $16, %ecx ## encoding: [0xc1,0xe1,0x10]
259; KNL-NEXT:    orl %eax, %ecx ## encoding: [0x09,0xc1]
260; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc3]
261; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
262; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
263; KNL-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
264; KNL-NEXT:    vextracti64x4 $1, %zmm3, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd8,0x01]
265; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc9,0x01]
266; KNL-NEXT:    vpcmpeqw %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc0]
267; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
268; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
269; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
270; KNL-NEXT:    shll $16, %eax ## encoding: [0xc1,0xe0,0x10]
271; KNL-NEXT:    orl %edx, %eax ## encoding: [0x09,0xd0]
272; KNL-NEXT:    shlq $32, %rax ## encoding: [0x48,0xc1,0xe0,0x20]
273; KNL-NEXT:    orq %rcx, %rax ## encoding: [0x48,0x09,0xc8]
274; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
275; KNL-NEXT:    retq ## encoding: [0xc3]
276;
277; AVX512BW-LABEL: test12_v64i16:
278; AVX512BW:       ## %bb.0:
279; AVX512BW-NEXT:    vpcmpeqw %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc2]
280; AVX512BW-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x75,0xcb]
281; AVX512BW-NEXT:    kunpckdq %k0, %k1, %k0 ## encoding: [0xc4,0xe1,0xf4,0x4b,0xc0]
282; AVX512BW-NEXT:    kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
283; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
284; AVX512BW-NEXT:    retq ## encoding: [0xc3]
285;
286; SKX-LABEL: test12_v64i16:
287; SKX:       ## %bb.0:
288; SKX-NEXT:    vpcmpeqw %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc2]
289; SKX-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x75,0xcb]
290; SKX-NEXT:    kunpckdq %k0, %k1, %k0 ## encoding: [0xc4,0xe1,0xf4,0x4b,0xc0]
291; SKX-NEXT:    kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
292; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
293; SKX-NEXT:    retq ## encoding: [0xc3]
294  %res = icmp eq <64 x i16> %a, %b
295  %res1 = bitcast <64 x i1> %res to i64
296  ret i64 %res1
297}
298
299define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
300; AVX512-LABEL: test13:
301; AVX512:       ## %bb.0:
302; AVX512-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x00]
303; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
304; AVX512-NEXT:    ## zmm0 {%k1} {z} = -1
305; AVX512-NEXT:    vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
306; AVX512-NEXT:    retq ## encoding: [0xc3]
307;
308; SKX-LABEL: test13:
309; SKX:       ## %bb.0:
310; SKX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc1,0x00]
311; SKX-NEXT:    vpmovm2d %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0]
312; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
313; SKX-NEXT:    retq ## encoding: [0xc3]
314{
315  %cmpvector_i = fcmp oeq <16 x float> %a, %b
316  %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
317  ret <16 x i32> %conv
318}
319
320define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
321; CHECK-LABEL: test14:
322; CHECK:       ## %bb.0:
323; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
324; CHECK-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
325; CHECK-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
326; CHECK-NEXT:    retq ## encoding: [0xc3]
327  %sub_r = sub <16 x i32> %a, %b
328  %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
329  %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
330  %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
331  %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
332  ret <16 x i32>%res
333}
334
335define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
336; CHECK-LABEL: test15:
337; CHECK:       ## %bb.0:
338; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
339; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
340; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
341; CHECK-NEXT:    retq ## encoding: [0xc3]
342  %sub_r = sub <8 x i64> %a, %b
343  %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
344  %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
345  %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
346  %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
347  ret <8 x i64>%res
348}
349
350define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
351; CHECK-LABEL: test16:
352; CHECK:       ## %bb.0:
353; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
354; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
355; CHECK-NEXT:    retq ## encoding: [0xc3]
356  %mask = icmp sge <16 x i32> %x, %y
357  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
358  ret <16 x i32> %max
359}
360
361define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
362; CHECK-LABEL: test17:
363; CHECK:       ## %bb.0:
364; CHECK-NEXT:    vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
365; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
366; CHECK-NEXT:    retq ## encoding: [0xc3]
367  %y = load <16 x i32>, ptr %y.ptr, align 4
368  %mask = icmp sgt <16 x i32> %x, %y
369  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
370  ret <16 x i32> %max
371}
372
373define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
374; CHECK-LABEL: test18:
375; CHECK:       ## %bb.0:
376; CHECK-NEXT:    vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
377; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
378; CHECK-NEXT:    retq ## encoding: [0xc3]
379  %y = load <16 x i32>, ptr %y.ptr, align 4
380  %mask = icmp sle <16 x i32> %x, %y
381  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
382  ret <16 x i32> %max
383}
384
385define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
386; CHECK-LABEL: test19:
387; CHECK:       ## %bb.0:
388; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
389; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
390; CHECK-NEXT:    retq ## encoding: [0xc3]
391  %y = load <16 x i32>, ptr %y.ptr, align 4
392  %mask = icmp ule <16 x i32> %x, %y
393  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
394  ret <16 x i32> %max
395}
396
397define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
398; CHECK-LABEL: test20:
399; CHECK:       ## %bb.0:
400; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
401; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
402; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
403; CHECK-NEXT:    retq ## encoding: [0xc3]
404  %mask1 = icmp eq <16 x i32> %x1, %y1
405  %mask0 = icmp eq <16 x i32> %x, %y
406  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
407  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
408  ret <16 x i32> %max
409}
410
411define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
412; CHECK-LABEL: test21:
413; CHECK:       ## %bb.0:
414; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
415; CHECK-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
416; CHECK-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
417; CHECK-NEXT:    retq ## encoding: [0xc3]
418  %mask1 = icmp sge <8 x i64> %x1, %y1
419  %mask0 = icmp sle <8 x i64> %x, %y
420  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
421  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
422  ret <8 x i64> %max
423}
424
425define <8 x i64> @test22(<8 x i64> %x, ptr %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
426; CHECK-LABEL: test22:
427; CHECK:       ## %bb.0:
428; CHECK-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
429; CHECK-NEXT:    vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
430; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
431; CHECK-NEXT:    retq ## encoding: [0xc3]
432  %mask1 = icmp sgt <8 x i64> %x1, %y1
433  %y = load <8 x i64>, ptr %y.ptr, align 4
434  %mask0 = icmp sgt <8 x i64> %x, %y
435  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
436  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
437  ret <8 x i64> %max
438}
439
440define <16 x i32> @test23(<16 x i32> %x, ptr %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
441; CHECK-LABEL: test23:
442; CHECK:       ## %bb.0:
443; CHECK-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
444; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
445; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
446; CHECK-NEXT:    retq ## encoding: [0xc3]
447  %mask1 = icmp sge <16 x i32> %x1, %y1
448  %y = load <16 x i32>, ptr %y.ptr, align 4
449  %mask0 = icmp ule <16 x i32> %x, %y
450  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
451  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
452  ret <16 x i32> %max
453}
454
455define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, ptr %yb.ptr) nounwind {
456; CHECK-LABEL: test24:
457; CHECK:       ## %bb.0:
458; CHECK-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
459; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
460; CHECK-NEXT:    retq ## encoding: [0xc3]
461  %yb = load i64, ptr %yb.ptr, align 4
462  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
463  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
464  %mask = icmp eq <8 x i64> %x, %y
465  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
466  ret <8 x i64> %max
467}
468
469define <16 x i32> @test25(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1) nounwind {
470; CHECK-LABEL: test25:
471; CHECK:       ## %bb.0:
472; CHECK-NEXT:    vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
473; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
474; CHECK-NEXT:    retq ## encoding: [0xc3]
475  %yb = load i32, ptr %yb.ptr, align 4
476  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
477  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
478  %mask = icmp sle <16 x i32> %x, %y
479  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
480  ret <16 x i32> %max
481}
482
483define <16 x i32> @test26(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
484; CHECK-LABEL: test26:
485; CHECK:       ## %bb.0:
486; CHECK-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
487; CHECK-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
488; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
489; CHECK-NEXT:    retq ## encoding: [0xc3]
490  %mask1 = icmp sge <16 x i32> %x1, %y1
491  %yb = load i32, ptr %yb.ptr, align 4
492  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
493  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
494  %mask0 = icmp sgt <16 x i32> %x, %y
495  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
496  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
497  ret <16 x i32> %max
498}
499
500define <8 x i64> @test27(<8 x i64> %x, ptr %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
501; CHECK-LABEL: test27:
502; CHECK:       ## %bb.0:
503; CHECK-NEXT:    vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
504; CHECK-NEXT:    vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
505; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
506; CHECK-NEXT:    retq ## encoding: [0xc3]
507  %mask1 = icmp sge <8 x i64> %x1, %y1
508  %yb = load i64, ptr %yb.ptr, align 4
509  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
510  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
511  %mask0 = icmp sle <8 x i64> %x, %y
512  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
513  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
514  ret <8 x i64> %max
515}
516
517define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
518; AVX512-LABEL: test28:
519; AVX512:       ## %bb.0:
520; AVX512-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
521; AVX512-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x48,0x37,0xcb]
522; AVX512-NEXT:    kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9]
523; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
524; AVX512-NEXT:    ## zmm0 {%k1} {z} = -1
525; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
526; AVX512-NEXT:    retq ## encoding: [0xc3]
527;
528; SKX-LABEL: test28:
529; SKX:       ## %bb.0:
530; SKX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
531; SKX-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x48,0x37,0xcb]
532; SKX-NEXT:    kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
533; SKX-NEXT:    vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
534; SKX-NEXT:    retq ## encoding: [0xc3]
535  %x_gt_y = icmp sgt <8 x i64> %x, %y
536  %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
537  %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
538  %resse = sext <8 x i1>%res to <8 x i32>
539  ret <8 x i32> %resse
540}
541
542define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
543; KNL-LABEL: test29:
544; KNL:       ## %bb.0:
545; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
546; KNL-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
547; KNL-NEXT:    kxorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc9]
548; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
549; KNL-NEXT:    ## zmm0 {%k1} {z} = -1
550; KNL-NEXT:    vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
551; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
552; KNL-NEXT:    retq ## encoding: [0xc3]
553;
554; AVX512BW-LABEL: test29:
555; AVX512BW:       ## %bb.0:
556; AVX512BW-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
557; AVX512BW-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
558; AVX512BW-NEXT:    kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
559; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
560; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
561; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
562; AVX512BW-NEXT:    retq ## encoding: [0xc3]
563;
564; SKX-LABEL: test29:
565; SKX:       ## %bb.0:
566; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
567; SKX-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
568; SKX-NEXT:    kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
569; SKX-NEXT:    vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
570; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
571; SKX-NEXT:    retq ## encoding: [0xc3]
572  %x_gt_y = icmp sgt <16 x i32> %x, %y
573  %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
574  %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
575  %resse = sext <16 x i1>%res to <16 x i8>
576  ret <16 x i8> %resse
577}
578
579define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
580; AVX512-LABEL: test30:
581; AVX512:       ## %bb.0:
582; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
583; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
584; AVX512-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x00]
585; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
586; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
587; AVX512-NEXT:    retq ## encoding: [0xc3]
588;
589; SKX-LABEL: test30:
590; SKX:       ## %bb.0:
591; SKX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
592; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
593; SKX-NEXT:    retq ## encoding: [0xc3]
594
595  %mask = fcmp oeq <4 x double> %x, %y
596  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
597  ret <4 x double> %max
598}
599
600define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, ptr %yp) nounwind {
601; AVX512-LABEL: test31:
602; AVX512:       ## %bb.0:
603; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
604; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
605; AVX512-NEXT:    vmovupd (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x10,0x17]
606; AVX512-NEXT:    vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
607; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
608; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
609; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
610; AVX512-NEXT:    retq ## encoding: [0xc3]
611;
612; SKX-LABEL: test31:
613; SKX:       ## %bb.0:
614; SKX-NEXT:    vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
615; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
616; SKX-NEXT:    retq ## encoding: [0xc3]
617
618  %y = load <2 x double>, ptr %yp, align 4
619  %mask = fcmp olt <2 x double> %x, %y
620  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
621  ret <2 x double> %max
622}
623
624define <2 x double> @test31_commute(<2 x double> %x, <2 x double> %x1, ptr %yp) nounwind {
625; AVX512-LABEL: test31_commute:
626; AVX512:       ## %bb.0:
627; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
628; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
629; AVX512-NEXT:    vmovupd (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x10,0x17]
630; AVX512-NEXT:    vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
631; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
632; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
633; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
634; AVX512-NEXT:    retq ## encoding: [0xc3]
635;
636; SKX-LABEL: test31_commute:
637; SKX:       ## %bb.0:
638; SKX-NEXT:    vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
639; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
640; SKX-NEXT:    retq ## encoding: [0xc3]
641
642  %y = load <2 x double>, ptr %yp, align 4
643  %mask = fcmp olt <2 x double> %y, %x
644  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
645  ret <2 x double> %max
646}
647
648define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, ptr %yp) nounwind {
649; AVX512-LABEL: test32:
650; AVX512:       ## %bb.0:
651; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
652; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
653; AVX512-NEXT:    vmovupd (%rdi), %ymm2 ## encoding: [0xc5,0xfd,0x10,0x17]
654; AVX512-NEXT:    vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
655; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
656; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
657; AVX512-NEXT:    retq ## encoding: [0xc3]
658;
659; SKX-LABEL: test32:
660; SKX:       ## %bb.0:
661; SKX-NEXT:    vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
662; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
663; SKX-NEXT:    retq ## encoding: [0xc3]
664
665  %y = load <4 x double>, ptr %yp, align 4
666  %mask = fcmp ogt <4 x double> %y, %x
667  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
668  ret <4 x double> %max
669}
670
671define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, ptr %yp) nounwind {
672; AVX512-LABEL: test32_commute:
673; AVX512:       ## %bb.0:
674; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
675; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
676; AVX512-NEXT:    vmovupd (%rdi), %ymm2 ## encoding: [0xc5,0xfd,0x10,0x17]
677; AVX512-NEXT:    vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
678; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
679; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
680; AVX512-NEXT:    retq ## encoding: [0xc3]
681;
682; SKX-LABEL: test32_commute:
683; SKX:       ## %bb.0:
684; SKX-NEXT:    vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
685; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
686; SKX-NEXT:    retq ## encoding: [0xc3]
687
688  %y = load <4 x double>, ptr %yp, align 4
689  %mask = fcmp ogt <4 x double> %x, %y
690  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
691  ret <4 x double> %max
692}
693
694define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
695; CHECK-LABEL: test33:
696; CHECK:       ## %bb.0:
697; CHECK-NEXT:    vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
698; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
699; CHECK-NEXT:    retq ## encoding: [0xc3]
700  %y = load <8 x double>, ptr %yp, align 4
701  %mask = fcmp olt <8 x double> %x, %y
702  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
703  ret <8 x double> %max
704}
705
706define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
707; CHECK-LABEL: test33_commute:
708; CHECK:       ## %bb.0:
709; CHECK-NEXT:    vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
710; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
711; CHECK-NEXT:    retq ## encoding: [0xc3]
712  %y = load <8 x double>, ptr %yp, align 4
713  %mask = fcmp olt <8 x double> %y, %x
714  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
715  ret <8 x double> %max
716}
717
718define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, ptr %yp) nounwind {
719; AVX512-LABEL: test34:
720; AVX512:       ## %bb.0:
721; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
722; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
723; AVX512-NEXT:    vmovups (%rdi), %xmm2 ## encoding: [0xc5,0xf8,0x10,0x17]
724; AVX512-NEXT:    vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
725; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
726; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
727; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
728; AVX512-NEXT:    retq ## encoding: [0xc3]
729;
730; SKX-LABEL: test34:
731; SKX:       ## %bb.0:
732; SKX-NEXT:    vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
733; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
734; SKX-NEXT:    retq ## encoding: [0xc3]
735  %y = load <4 x float>, ptr %yp, align 4
736  %mask = fcmp olt <4 x float> %x, %y
737  %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
738  ret <4 x float> %max
739}
740
741define <4 x float> @test34_commute(<4 x float> %x, <4 x float> %x1, ptr %yp) nounwind {
742; AVX512-LABEL: test34_commute:
743; AVX512:       ## %bb.0:
744; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
745; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
746; AVX512-NEXT:    vmovups (%rdi), %xmm2 ## encoding: [0xc5,0xf8,0x10,0x17]
747; AVX512-NEXT:    vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
748; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
749; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
750; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
751; AVX512-NEXT:    retq ## encoding: [0xc3]
752;
753; SKX-LABEL: test34_commute:
754; SKX:       ## %bb.0:
755; SKX-NEXT:    vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
756; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
757; SKX-NEXT:    retq ## encoding: [0xc3]
758  %y = load <4 x float>, ptr %yp, align 4
759  %mask = fcmp olt <4 x float> %y, %x
760  %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
761  ret <4 x float> %max
762}
763
764define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, ptr %yp) nounwind {
765; AVX512-LABEL: test35:
766; AVX512:       ## %bb.0:
767; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
768; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
769; AVX512-NEXT:    vmovups (%rdi), %ymm2 ## encoding: [0xc5,0xfc,0x10,0x17]
770; AVX512-NEXT:    vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
771; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
772; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
773; AVX512-NEXT:    retq ## encoding: [0xc3]
774;
775; SKX-LABEL: test35:
776; SKX:       ## %bb.0:
777; SKX-NEXT:    vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
778; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
779; SKX-NEXT:    retq ## encoding: [0xc3]
780
781  %y = load <8 x float>, ptr %yp, align 4
782  %mask = fcmp ogt <8 x float> %y, %x
783  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
784  ret <8 x float> %max
785}
786
787define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, ptr %yp) nounwind {
788; AVX512-LABEL: test35_commute:
789; AVX512:       ## %bb.0:
790; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
791; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
792; AVX512-NEXT:    vmovups (%rdi), %ymm2 ## encoding: [0xc5,0xfc,0x10,0x17]
793; AVX512-NEXT:    vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
794; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
795; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
796; AVX512-NEXT:    retq ## encoding: [0xc3]
797;
798; SKX-LABEL: test35_commute:
799; SKX:       ## %bb.0:
800; SKX-NEXT:    vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
801; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
802; SKX-NEXT:    retq ## encoding: [0xc3]
803
804  %y = load <8 x float>, ptr %yp, align 4
805  %mask = fcmp ogt <8 x float> %x, %y
806  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
807  ret <8 x float> %max
808}
809
810define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
811; CHECK-LABEL: test36:
812; CHECK:       ## %bb.0:
813; CHECK-NEXT:    vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
814; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
815; CHECK-NEXT:    retq ## encoding: [0xc3]
816  %y = load <16 x float>, ptr %yp, align 4
817  %mask = fcmp olt <16 x float> %x, %y
818  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
819  ret <16 x float> %max
820}
821
822define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
823; CHECK-LABEL: test36_commute:
824; CHECK:       ## %bb.0:
825; CHECK-NEXT:    vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
826; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
827; CHECK-NEXT:    retq ## encoding: [0xc3]
828  %y = load <16 x float>, ptr %yp, align 4
829  %mask = fcmp olt <16 x float> %y, %x
830  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
831  ret <16 x float> %max
832}
833
834define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
835; CHECK-LABEL: test37:
836; CHECK:       ## %bb.0:
837; CHECK-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
838; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
839; CHECK-NEXT:    retq ## encoding: [0xc3]
840
841  %a = load double, ptr %ptr
842  %v = insertelement <8 x double> undef, double %a, i32 0
843  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
844
845  %mask = fcmp ogt <8 x double> %shuffle, %x
846  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
847  ret <8 x double> %max
848}
849
850define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
851; CHECK-LABEL: test37_commute:
852; CHECK:       ## %bb.0:
853; CHECK-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
854; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
855; CHECK-NEXT:    retq ## encoding: [0xc3]
856
857  %a = load double, ptr %ptr
858  %v = insertelement <8 x double> undef, double %a, i32 0
859  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
860
861  %mask = fcmp ogt <8 x double> %x, %shuffle
862  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
863  ret <8 x double> %max
864}
865
866define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, ptr %ptr) nounwind {
867; AVX512-LABEL: test38:
868; AVX512:       ## %bb.0:
869; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
870; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
871; AVX512-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
872; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
873; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
874; AVX512-NEXT:    retq ## encoding: [0xc3]
875;
876; SKX-LABEL: test38:
877; SKX:       ## %bb.0:
878; SKX-NEXT:    vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
879; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
880; SKX-NEXT:    retq ## encoding: [0xc3]
881
882  %a = load double, ptr %ptr
883  %v = insertelement <4 x double> undef, double %a, i32 0
884  %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
885
886  %mask = fcmp ogt <4 x double> %shuffle, %x
887  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
888  ret <4 x double> %max
889}
890
891define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, ptr %ptr) nounwind {
892; AVX512-LABEL: test38_commute:
893; AVX512:       ## %bb.0:
894; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
895; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
896; AVX512-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
897; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
898; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
899; AVX512-NEXT:    retq ## encoding: [0xc3]
900;
901; SKX-LABEL: test38_commute:
902; SKX:       ## %bb.0:
903; SKX-NEXT:    vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
904; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
905; SKX-NEXT:    retq ## encoding: [0xc3]
906
907  %a = load double, ptr %ptr
908  %v = insertelement <4 x double> undef, double %a, i32 0
909  %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
910
911  %mask = fcmp ogt <4 x double> %x, %shuffle
912  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
913  ret <4 x double> %max
914}
915
916define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, ptr %ptr) nounwind {
917; AVX512-LABEL: test39:
918; AVX512:       ## %bb.0:
919; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
920; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
921; AVX512-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
922; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
923; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
924; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
925; AVX512-NEXT:    retq ## encoding: [0xc3]
926;
927; SKX-LABEL: test39:
928; SKX:       ## %bb.0:
929; SKX-NEXT:    vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
930; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
931; SKX-NEXT:    retq ## encoding: [0xc3]
932
933  %a = load double, ptr %ptr
934  %v = insertelement <2 x double> undef, double %a, i32 0
935  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
936
937  %mask = fcmp ogt <2 x double> %shuffle, %x
938  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
939  ret <2 x double> %max
940}
941
942define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, ptr %ptr) nounwind {
943; AVX512-LABEL: test39_commute:
944; AVX512:       ## %bb.0:
945; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
946; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
947; AVX512-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
948; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
949; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
950; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
951; AVX512-NEXT:    retq ## encoding: [0xc3]
952;
953; SKX-LABEL: test39_commute:
954; SKX:       ## %bb.0:
955; SKX-NEXT:    vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
956; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
957; SKX-NEXT:    retq ## encoding: [0xc3]
958
959  %a = load double, ptr %ptr
960  %v = insertelement <2 x double> undef, double %a, i32 0
961  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
962
963  %mask = fcmp ogt <2 x double> %x, %shuffle
964  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
965  ret <2 x double> %max
966}
967
968
969define <16  x float> @test40(<16  x float> %x, <16  x float> %x1, ptr %ptr) nounwind {
970; CHECK-LABEL: test40:
971; CHECK:       ## %bb.0:
972; CHECK-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
973; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
974; CHECK-NEXT:    retq ## encoding: [0xc3]
975
976  %a = load float, ptr %ptr
977  %v = insertelement <16  x float> undef, float %a, i32 0
978  %shuffle = shufflevector <16  x float> %v, <16  x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
979
980  %mask = fcmp ogt <16  x float> %shuffle, %x
981  %max = select <16 x i1> %mask, <16  x float> %x, <16  x float> %x1
982  ret <16  x float> %max
983}
984
985define <16  x float> @test40_commute(<16  x float> %x, <16  x float> %x1, ptr %ptr) nounwind {
986; CHECK-LABEL: test40_commute:
987; CHECK:       ## %bb.0:
988; CHECK-NEXT:    vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
989; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
990; CHECK-NEXT:    retq ## encoding: [0xc3]
991
992  %a = load float, ptr %ptr
993  %v = insertelement <16  x float> undef, float %a, i32 0
994  %shuffle = shufflevector <16  x float> %v, <16  x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
995
996  %mask = fcmp ogt <16  x float> %x, %shuffle
997  %max = select <16 x i1> %mask, <16  x float> %x, <16  x float> %x1
998  ret <16  x float> %max
999}
1000
1001define <8  x float> @test41(<8  x float> %x, <8  x float> %x1, ptr %ptr) nounwind {
1002; AVX512-LABEL: test41:
1003; AVX512:       ## %bb.0:
1004; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
1005; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
1006; AVX512-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
1007; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1008; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
1009; AVX512-NEXT:    retq ## encoding: [0xc3]
1010;
1011; SKX-LABEL: test41:
1012; SKX:       ## %bb.0:
1013; SKX-NEXT:    vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
1014; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
1015; SKX-NEXT:    retq ## encoding: [0xc3]
1016
1017  %a = load float, ptr %ptr
1018  %v = insertelement <8  x float> undef, float %a, i32 0
1019  %shuffle = shufflevector <8  x float> %v, <8  x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1020
1021  %mask = fcmp ogt <8  x float> %shuffle, %x
1022  %max = select <8 x i1> %mask, <8  x float> %x, <8  x float> %x1
1023  ret <8  x float> %max
1024}
1025
1026define <8  x float> @test41_commute(<8  x float> %x, <8  x float> %x1, ptr %ptr) nounwind {
1027; AVX512-LABEL: test41_commute:
1028; AVX512:       ## %bb.0:
1029; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
1030; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
1031; AVX512-NEXT:    vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
1032; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1033; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
1034; AVX512-NEXT:    retq ## encoding: [0xc3]
1035;
1036; SKX-LABEL: test41_commute:
1037; SKX:       ## %bb.0:
1038; SKX-NEXT:    vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
1039; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
1040; SKX-NEXT:    retq ## encoding: [0xc3]
1041
1042  %a = load float, ptr %ptr
1043  %v = insertelement <8  x float> undef, float %a, i32 0
1044  %shuffle = shufflevector <8  x float> %v, <8  x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1045
1046  %mask = fcmp ogt <8  x float> %x, %shuffle
1047  %max = select <8 x i1> %mask, <8  x float> %x, <8  x float> %x1
1048  ret <8  x float> %max
1049}
1050
1051define <4  x float> @test42(<4  x float> %x, <4  x float> %x1, ptr %ptr) nounwind {
1052; AVX512-LABEL: test42:
1053; AVX512:       ## %bb.0:
1054; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
1055; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
1056; AVX512-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
1057; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1058; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1059; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1060; AVX512-NEXT:    retq ## encoding: [0xc3]
1061;
1062; SKX-LABEL: test42:
1063; SKX:       ## %bb.0:
1064; SKX-NEXT:    vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
1065; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
1066; SKX-NEXT:    retq ## encoding: [0xc3]
1067
1068  %a = load float, ptr %ptr
1069  %v = insertelement <4  x float> undef, float %a, i32 0
1070  %shuffle = shufflevector <4  x float> %v, <4  x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1071
1072  %mask = fcmp ogt <4  x float> %shuffle, %x
1073  %max = select <4 x i1> %mask, <4  x float> %x, <4  x float> %x1
1074  ret <4  x float> %max
1075}
1076
1077define <4  x float> @test42_commute(<4  x float> %x, <4  x float> %x1, ptr %ptr) nounwind {
1078; AVX512-LABEL: test42_commute:
1079; AVX512:       ## %bb.0:
1080; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
1081; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
1082; AVX512-NEXT:    vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
1083; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1084; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1085; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1086; AVX512-NEXT:    retq ## encoding: [0xc3]
1087;
1088; SKX-LABEL: test42_commute:
1089; SKX:       ## %bb.0:
1090; SKX-NEXT:    vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
1091; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
1092; SKX-NEXT:    retq ## encoding: [0xc3]
1093
1094  %a = load float, ptr %ptr
1095  %v = insertelement <4  x float> undef, float %a, i32 0
1096  %shuffle = shufflevector <4  x float> %v, <4  x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1097
1098  %mask = fcmp ogt <4  x float> %x, %shuffle
1099  %max = select <4 x i1> %mask, <4  x float> %x, <4  x float> %x1
1100  ret <4  x float> %max
1101}
1102
1103define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, ptr %ptr,<8 x i1> %mask_in) nounwind {
1104; KNL-LABEL: test43:
1105; KNL:       ## %bb.0:
1106; KNL-NEXT:    vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
1107; KNL-NEXT:    vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
1108; KNL-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
1109; KNL-NEXT:    vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca]
1110; KNL-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1111; KNL-NEXT:    retq ## encoding: [0xc3]
1112;
1113; AVX512BW-LABEL: test43:
1114; AVX512BW:       ## %bb.0:
1115; AVX512BW-NEXT:    vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1116; AVX512BW-NEXT:    vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca]
1117; AVX512BW-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
1118; AVX512BW-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1119; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1120;
1121; SKX-LABEL: test43:
1122; SKX:       ## %bb.0:
1123; SKX-NEXT:    vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1124; SKX-NEXT:    vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca]
1125; SKX-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
1126; SKX-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1127; SKX-NEXT:    retq ## encoding: [0xc3]
1128
1129  %a = load double, ptr %ptr
1130  %v = insertelement <8 x double> undef, double %a, i32 0
1131  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
1132
1133  %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
1134  %mask = and <8 x i1> %mask_cmp, %mask_in
1135  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
1136  ret <8 x double> %max
1137}
1138
1139define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, ptr %ptr,<8 x i1> %mask_in) nounwind {
1140; KNL-LABEL: test43_commute:
1141; KNL:       ## %bb.0:
1142; KNL-NEXT:    vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
1143; KNL-NEXT:    vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
1144; KNL-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
1145; KNL-NEXT:    vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca]
1146; KNL-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1147; KNL-NEXT:    retq ## encoding: [0xc3]
1148;
1149; AVX512BW-LABEL: test43_commute:
1150; AVX512BW:       ## %bb.0:
1151; AVX512BW-NEXT:    vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1152; AVX512BW-NEXT:    vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca]
1153; AVX512BW-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
1154; AVX512BW-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1155; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1156;
1157; SKX-LABEL: test43_commute:
1158; SKX:       ## %bb.0:
1159; SKX-NEXT:    vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1160; SKX-NEXT:    vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca]
1161; SKX-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
1162; SKX-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1163; SKX-NEXT:    retq ## encoding: [0xc3]
1164
1165  %a = load double, ptr %ptr
1166  %v = insertelement <8 x double> undef, double %a, i32 0
1167  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
1168
1169  %mask_cmp = fcmp ogt <8 x double> %x, %shuffle
1170  %mask = and <8 x i1> %mask_cmp, %mask_in
1171  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
1172  ret <8 x double> %max
1173}
1174
1175define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
1176; AVX512-LABEL: test44:
1177; AVX512:       ## %bb.0:
1178; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
1179; AVX512-NEXT:    vpmovsxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x23,0xc0]
1180; AVX512-NEXT:    retq ## encoding: [0xc3]
1181;
1182; SKX-LABEL: test44:
1183; SKX:       ## %bb.0:
1184; SKX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1185; SKX-NEXT:    vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
1186; SKX-NEXT:    retq ## encoding: [0xc3]
1187  %mask = icmp eq <4 x i16> %x, %y
1188  %1 = sext <4 x i1> %mask to <4 x i32>
1189  ret <4 x i32> %1
1190}
1191
1192define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
1193; AVX512-LABEL: test45:
1194; AVX512:       ## %bb.0:
1195; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
1196; AVX512-NEXT:    vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
1197; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1198; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1199; AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1200; AVX512-NEXT:    retq ## encoding: [0xc3]
1201;
1202; SKX-LABEL: test45:
1203; SKX:       ## %bb.0:
1204; SKX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1205; SKX-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1206; SKX-NEXT:    vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
1207; SKX-NEXT:    retq ## encoding: [0xc3]
1208  %mask = icmp eq <2 x i16> %x, %y
1209  %1 = zext <2 x i1> %mask to <2 x i64>
1210  ret <2 x i64> %1
1211}
1212
1213define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
1214; AVX512-LABEL: test46:
1215; AVX512:       ## %bb.0:
1216; AVX512-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
1217; AVX512-NEXT:    vshufps $212, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0xd4]
1218; AVX512-NEXT:    ## xmm0 = xmm0[0,1,1,3]
1219; AVX512-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A]
1220; AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1221; AVX512-NEXT:    retq ## encoding: [0xc3]
1222;
1223; SKX-LABEL: test46:
1224; SKX:       ## %bb.0:
1225; SKX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00]
1226; SKX-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1227; SKX-NEXT:    vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
1228; SKX-NEXT:    retq ## encoding: [0xc3]
1229  %mask = fcmp oeq <2 x float> %x, %y
1230  %1 = zext <2 x i1> %mask to <2 x i64>
1231  ret <2 x i64> %1
1232}
1233
1234define <16 x i8> @test47(<16 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
1235; KNL-LABEL: test47:
1236; KNL:       ## %bb.0:
1237; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1238; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1239; KNL-NEXT:    ## zmm0 {%k1} {z} = -1
1240; KNL-NEXT:    vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
1241; KNL-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x4c,0xc1,0x00]
1242; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1243; KNL-NEXT:    retq ## encoding: [0xc3]
1244;
1245; AVX512BW-LABEL: test47:
1246; AVX512BW:       ## %bb.0:
1247; AVX512BW-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
1248; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
1249; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1250; AVX512BW-NEXT:    vpblendmb %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x66,0xc1]
1251; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1252; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1253; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1254;
1255; SKX-LABEL: test47:
1256; SKX:       ## %bb.0:
1257; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1258; SKX-NEXT:    vpblendmb %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x66,0xc1]
1259; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1260; SKX-NEXT:    retq ## encoding: [0xc3]
1261  %cmp = icmp eq <16 x i32> %a, zeroinitializer
1262  %res = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %c
1263  ret <16 x i8> %res
1264}
1265
1266define <16 x i16> @test48(<16 x i32> %a, <16 x i16> %b, <16 x i16> %c) {
1267; KNL-LABEL: test48:
1268; KNL:       ## %bb.0:
1269; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1270; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1271; KNL-NEXT:    ## zmm0 {%k1} {z} = -1
1272; KNL-NEXT:    vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
1273; KNL-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ## encoding: [0xc4,0xe3,0x6d,0x4c,0xc1,0x00]
1274; KNL-NEXT:    retq ## encoding: [0xc3]
1275;
1276; AVX512BW-LABEL: test48:
1277; AVX512BW:       ## %bb.0:
1278; AVX512BW-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
1279; AVX512BW-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
1280; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1281; AVX512BW-NEXT:    vpblendmw %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x66,0xc1]
1282; AVX512BW-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
1283; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1284;
1285; SKX-LABEL: test48:
1286; SKX:       ## %bb.0:
1287; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1288; SKX-NEXT:    vpblendmw %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x66,0xc1]
1289; SKX-NEXT:    retq ## encoding: [0xc3]
1290  %cmp = icmp eq <16 x i32> %a, zeroinitializer
1291  %res = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %c
1292  ret <16 x i16> %res
1293}
1294
1295define <8 x i16> @test49(<8 x i64> %a, <8 x i16> %b, <8 x i16> %c) {
1296; KNL-LABEL: test49:
1297; KNL:       ## %bb.0:
1298; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
1299; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1300; KNL-NEXT:    ## zmm0 {%k1} {z} = -1
1301; KNL-NEXT:    vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
1302; KNL-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x4c,0xc1,0x00]
1303; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1304; KNL-NEXT:    retq ## encoding: [0xc3]
1305;
1306; AVX512BW-LABEL: test49:
1307; AVX512BW:       ## %bb.0:
1308; AVX512BW-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
1309; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
1310; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
1311; AVX512BW-NEXT:    vpblendmw %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x66,0xc1]
1312; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1313; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1314; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1315;
1316; SKX-LABEL: test49:
1317; SKX:       ## %bb.0:
1318; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
1319; SKX-NEXT:    vpblendmw %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x66,0xc1]
1320; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1321; SKX-NEXT:    retq ## encoding: [0xc3]
1322  %cmp = icmp eq <8 x i64> %a, zeroinitializer
1323  %res = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %c
1324  ret <8 x i16> %res
1325}
1326
1327define i16 @pcmpeq_mem_1(<16 x i32> %a, ptr %b) {
1328; KNL-LABEL: pcmpeq_mem_1:
1329; KNL:       ## %bb.0:
1330; KNL-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1331; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1332; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
1333; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1334; KNL-NEXT:    retq ## encoding: [0xc3]
1335;
1336; AVX512BW-LABEL: pcmpeq_mem_1:
1337; AVX512BW:       ## %bb.0:
1338; AVX512BW-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1339; AVX512BW-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1340; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
1341; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1342; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1343;
1344; SKX-LABEL: pcmpeq_mem_1:
1345; SKX:       ## %bb.0:
1346; SKX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1347; SKX-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1348; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
1349; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1350; SKX-NEXT:    retq ## encoding: [0xc3]
1351  %load = load <16 x i32>, ptr %b
1352  %cmp = icmp eq <16 x i32> %a, %load
1353  %cast = bitcast <16 x i1> %cmp to i16
1354  ret i16 %cast
1355}
1356
1357; Make sure we use the short pcmpeq encoding like the test above when the memoryo
1358; operand is in the first argument instead of the second.
1359define i16 @pcmpeq_mem_2(<16 x i32> %a, ptr %b) {
1360; KNL-LABEL: pcmpeq_mem_2:
1361; KNL:       ## %bb.0:
1362; KNL-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1363; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1364; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
1365; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1366; KNL-NEXT:    retq ## encoding: [0xc3]
1367;
1368; AVX512BW-LABEL: pcmpeq_mem_2:
1369; AVX512BW:       ## %bb.0:
1370; AVX512BW-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1371; AVX512BW-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1372; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
1373; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1374; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1375;
1376; SKX-LABEL: pcmpeq_mem_2:
1377; SKX:       ## %bb.0:
1378; SKX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1379; SKX-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1380; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
1381; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1382; SKX-NEXT:    retq ## encoding: [0xc3]
1383  %load = load <16 x i32>, ptr %b
1384  %cmp = icmp eq <16 x i32> %load, %a
1385  %cast = bitcast <16 x i1> %cmp to i16
1386  ret i16 %cast
1387}
1388
1389; Don't let a degenerate case trigger an infinite loop.
1390; This should get simplified before it even exists as a vselect node,
1391; but that does not happen as of this change.
1392
1393define <2 x i64> @PR41066(<2 x i64> %t0, <2 x double> %x, <2 x double> %y) {
1394; AVX512-LABEL: PR41066:
1395; AVX512:       ## %bb.0:
1396; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x57,0xc0]
1397; AVX512-NEXT:    retq ## encoding: [0xc3]
1398;
1399; SKX-LABEL: PR41066:
1400; SKX:       ## %bb.0:
1401; SKX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
1402; SKX-NEXT:    retq ## encoding: [0xc3]
1403  %t1 = fcmp ogt <2 x double> %x, %y
1404  %t2 = select <2 x i1> %t1, <2 x i64> <i64 undef, i64 0>, <2 x i64> zeroinitializer
1405  ret <2 x i64> %t2
1406}
1407
1408define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32> %x) {
1409; AVX512-LABEL: zext_bool_logic:
1410; AVX512:       ## %bb.0:
1411; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
1412; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
1413; AVX512-NEXT:    vptestnmq %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc0]
1414; AVX512-NEXT:    vptestnmq %zmm1, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x48,0x27,0xc9]
1415; AVX512-NEXT:    korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
1416; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1417; AVX512-NEXT:    ## zmm0 {%k1} {z} = -1
1418; AVX512-NEXT:    vpsubd %xmm0, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0xfa,0xc0]
1419; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1420; AVX512-NEXT:    retq ## encoding: [0xc3]
1421;
1422; SKX-LABEL: zext_bool_logic:
1423; SKX:       ## %bb.0:
1424; SKX-NEXT:    vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
1425; SKX-NEXT:    vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
1426; SKX-NEXT:    korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
1427; SKX-NEXT:    vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
1428; SKX-NEXT:    vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
1429; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1430; SKX-NEXT:    retq ## encoding: [0xc3]
1431  %a = icmp eq <4 x i64> %cond1, zeroinitializer
1432  %b = icmp eq <4 x i64> %cond2, zeroinitializer
1433  %c = or <4 x i1> %a, %b
1434  %d = zext <4 x i1> %c to <4 x i32>
1435  %e = add <4 x i32> %d, %x
1436  ret <4 x i32> %e
1437}
1438
1439; This used to crash in WidenVecRes_SETCC due to generating the wrong
1440; result type.
1441define void @half_vec_compare(ptr %x, ptr %y) {
1442; KNL-LABEL: half_vec_compare:
1443; KNL:       ## %bb.0: ## %entry
1444; KNL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1445; KNL-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1446; KNL-NEXT:    vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
1447; KNL-NEXT:    ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
1448; KNL-NEXT:    vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
1449; KNL-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1450; KNL-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
1451; KNL-NEXT:    vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca]
1452; KNL-NEXT:    movl $65535, %ecx ## encoding: [0xb9,0xff,0xff,0x00,0x00]
1453; KNL-NEXT:    ## imm = 0xFFFF
1454; KNL-NEXT:    movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00]
1455; KNL-NEXT:    cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1]
1456; KNL-NEXT:    cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1]
1457; KNL-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1458; KNL-NEXT:    vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
1459; KNL-NEXT:    cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1]
1460; KNL-NEXT:    cmovpl %ecx, %eax ## encoding: [0x0f,0x4a,0xc1]
1461; KNL-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1462; KNL-NEXT:    vpinsrw $1, %edx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc2,0x01]
1463; KNL-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
1464; KNL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1465; KNL-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1466; KNL-NEXT:    vpextrw $0, %xmm0, (%rsi) ## encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1467; KNL-NEXT:    retq ## encoding: [0xc3]
1468;
1469; AVX512BW-LABEL: half_vec_compare:
1470; AVX512BW:       ## %bb.0: ## %entry
1471; AVX512BW-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1472; AVX512BW-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1473; AVX512BW-NEXT:    vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
1474; AVX512BW-NEXT:    ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
1475; AVX512BW-NEXT:    vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
1476; AVX512BW-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1477; AVX512BW-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
1478; AVX512BW-NEXT:    vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca]
1479; AVX512BW-NEXT:    movl $65535, %ecx ## encoding: [0xb9,0xff,0xff,0x00,0x00]
1480; AVX512BW-NEXT:    ## imm = 0xFFFF
1481; AVX512BW-NEXT:    movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00]
1482; AVX512BW-NEXT:    cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1]
1483; AVX512BW-NEXT:    cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1]
1484; AVX512BW-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1485; AVX512BW-NEXT:    vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
1486; AVX512BW-NEXT:    cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1]
1487; AVX512BW-NEXT:    cmovpl %ecx, %eax ## encoding: [0x0f,0x4a,0xc1]
1488; AVX512BW-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1489; AVX512BW-NEXT:    vpinsrw $1, %edx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x01]
1490; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
1491; AVX512BW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1492; AVX512BW-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1493; AVX512BW-NEXT:    vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1494; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1495;
1496; SKX-LABEL: half_vec_compare:
1497; SKX:       ## %bb.0: ## %entry
1498; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1499; SKX-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
1500; SKX-NEXT:    vcvtph2ps %xmm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
1501; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
1502; SKX-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x04]
1503; SKX-NEXT:    vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1504; SKX-NEXT:    ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x05,A,A,A,A]
1505; SKX-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1506; SKX-NEXT:    vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1507; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1508; SKX-NEXT:    retq ## encoding: [0xc3]
1509entry:
1510  %0 = load <2 x half>, ptr %x
1511  %1 = fcmp une <2 x half> %0, zeroinitializer
1512  %2 = zext <2 x i1> %1 to <2 x i8>
1513  store <2 x i8> %2, ptr %y
1514  ret void
1515}
1516
1517; This test used to end up with the vpcmpgtb on KNL having its operands in the wrong order.
1518define <8 x i64> @cmp_swap_bug(ptr %x, <8 x i64> %y, <8 x i64> %z) {
1519; KNL-LABEL: cmp_swap_bug:
1520; KNL:       ## %bb.0: ## %entry
1521; KNL-NEXT:    vmovdqa (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x6f,0x17]
1522; KNL-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1523; KNL-NEXT:    ## encoding: [0xc4,0xe2,0x69,0x00,0x15,A,A,A,A]
1524; KNL-NEXT:    ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1525; KNL-NEXT:    vpxor %xmm3, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0xef,0xdb]
1526; KNL-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2 ## encoding: [0xc5,0xe1,0x64,0xd2]
1527; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd2]
1528; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x48,0x27,0xca]
1529; KNL-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
1530; KNL-NEXT:    retq ## encoding: [0xc3]
1531;
1532; AVX512BW-LABEL: cmp_swap_bug:
1533; AVX512BW:       ## %bb.0: ## %entry
1534; AVX512BW-NEXT:    vmovdqa (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x6f,0x17]
1535; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1536; AVX512BW-NEXT:    ## encoding: [0xc4,0xe2,0x69,0x00,0x15,A,A,A,A]
1537; AVX512BW-NEXT:    ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1538; AVX512BW-NEXT:    vpmovb2m %zmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x29,0xca]
1539; AVX512BW-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
1540; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1541;
1542; SKX-LABEL: cmp_swap_bug:
1543; SKX:       ## %bb.0: ## %entry
1544; SKX-NEXT:    vmovdqa (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x17]
1545; SKX-NEXT:    vpmovwb %xmm2, %xmm2 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xd2]
1546; SKX-NEXT:    vpmovb2m %xmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xca]
1547; SKX-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
1548; SKX-NEXT:    retq ## encoding: [0xc3]
1549entry:
1550  %0 = load <16 x i8>, ptr %x
1551  %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1552  %2 = icmp slt <8 x i8> %1, zeroinitializer
1553  %3 = select <8 x i1> %2, <8 x i64> %y, <8 x i64> %z
1554  ret <8 x i64> %3
1555}
1556
1557define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind {
1558; AVX512-LABEL: narrow_cmp_select_reverse:
1559; AVX512:       ## %bb.0:
1560; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
1561; AVX512-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc2]
1562; AVX512-NEXT:    vpshufd $232, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x70,0xc0,0xe8]
1563; AVX512-NEXT:    ## xmm0 = xmm0[0,2,2,3]
1564; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdf,0xc1]
1565; AVX512-NEXT:    retq ## encoding: [0xc3]
1566;
1567; SKX-LABEL: narrow_cmp_select_reverse:
1568; SKX:       ## %bb.0:
1569; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
1570; SKX-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
1571; SKX-NEXT:    retq ## encoding: [0xc3]
1572  %mask = icmp eq <2 x i64> %x, zeroinitializer
1573  %res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y
1574  ret <2 x i32> %res
1575}
1576