xref: /llvm-project/llvm/test/CodeGen/X86/bitcast-setcc-512.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
7
8define i32 @v32i16(<32 x i16> %a, <32 x i16> %b) {
9; SSE-LABEL: v32i16:
10; SSE:       # %bb.0:
11; SSE-NEXT:    pcmpgtw %xmm5, %xmm1
12; SSE-NEXT:    pcmpgtw %xmm4, %xmm0
13; SSE-NEXT:    packsswb %xmm1, %xmm0
14; SSE-NEXT:    pmovmskb %xmm0, %ecx
15; SSE-NEXT:    pcmpgtw %xmm7, %xmm3
16; SSE-NEXT:    pcmpgtw %xmm6, %xmm2
17; SSE-NEXT:    packsswb %xmm3, %xmm2
18; SSE-NEXT:    pmovmskb %xmm2, %eax
19; SSE-NEXT:    shll $16, %eax
20; SSE-NEXT:    orl %ecx, %eax
21; SSE-NEXT:    retq
22;
23; AVX1-LABEL: v32i16:
24; AVX1:       # %bb.0:
25; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
26; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
27; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm5, %xmm4
28; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm0, %xmm0
29; AVX1-NEXT:    vpacksswb %xmm4, %xmm0, %xmm0
30; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
31; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm0
32; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
33; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm2, %xmm0
34; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm1, %xmm1
35; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm0
36; AVX1-NEXT:    vpmovmskb %xmm0, %eax
37; AVX1-NEXT:    shll $16, %eax
38; AVX1-NEXT:    orl %ecx, %eax
39; AVX1-NEXT:    vzeroupper
40; AVX1-NEXT:    retq
41;
42; AVX2-LABEL: v32i16:
43; AVX2:       # %bb.0:
44; AVX2-NEXT:    vpcmpgtw %ymm3, %ymm1, %ymm1
45; AVX2-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm0
46; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
47; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
48; AVX2-NEXT:    vpmovmskb %ymm0, %eax
49; AVX2-NEXT:    vzeroupper
50; AVX2-NEXT:    retq
51;
52; AVX512F-LABEL: v32i16:
53; AVX512F:       # %bb.0:
54; AVX512F-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm2
55; AVX512F-NEXT:    vpmovsxwd %ymm2, %zmm2
56; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k0
57; AVX512F-NEXT:    kmovw %k0, %ecx
58; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
59; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
60; AVX512F-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
61; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
62; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
63; AVX512F-NEXT:    kmovw %k0, %eax
64; AVX512F-NEXT:    shll $16, %eax
65; AVX512F-NEXT:    orl %ecx, %eax
66; AVX512F-NEXT:    vzeroupper
67; AVX512F-NEXT:    retq
68;
69; AVX512BW-LABEL: v32i16:
70; AVX512BW:       # %bb.0:
71; AVX512BW-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
72; AVX512BW-NEXT:    kmovd %k0, %eax
73; AVX512BW-NEXT:    vzeroupper
74; AVX512BW-NEXT:    retq
75  %x = icmp sgt <32 x i16> %a, %b
76  %res = bitcast <32 x i1> %x to i32
77  ret i32 %res
78}
79
80define i16 @v16i32(<16 x i32> %a, <16 x i32> %b) {
81; SSE-LABEL: v16i32:
82; SSE:       # %bb.0:
83; SSE-NEXT:    pcmpgtd %xmm7, %xmm3
84; SSE-NEXT:    pcmpgtd %xmm6, %xmm2
85; SSE-NEXT:    packssdw %xmm3, %xmm2
86; SSE-NEXT:    pcmpgtd %xmm5, %xmm1
87; SSE-NEXT:    pcmpgtd %xmm4, %xmm0
88; SSE-NEXT:    packssdw %xmm1, %xmm0
89; SSE-NEXT:    packsswb %xmm2, %xmm0
90; SSE-NEXT:    pmovmskb %xmm0, %eax
91; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
92; SSE-NEXT:    retq
93;
94; AVX1-LABEL: v16i32:
95; AVX1:       # %bb.0:
96; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
97; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
98; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm4
99; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm1, %xmm1
100; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
101; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
102; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
103; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm4, %xmm3
104; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm0, %xmm0
105; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
106; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
107; AVX1-NEXT:    vpmovmskb %xmm0, %eax
108; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
109; AVX1-NEXT:    vzeroupper
110; AVX1-NEXT:    retq
111;
112; AVX2-LABEL: v16i32:
113; AVX2:       # %bb.0:
114; AVX2-NEXT:    vpcmpgtd %ymm3, %ymm1, %ymm1
115; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm0, %ymm0
116; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
117; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
118; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
119; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
120; AVX2-NEXT:    vpmovmskb %xmm0, %eax
121; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
122; AVX2-NEXT:    vzeroupper
123; AVX2-NEXT:    retq
124;
125; AVX512F-LABEL: v16i32:
126; AVX512F:       # %bb.0:
127; AVX512F-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
128; AVX512F-NEXT:    kmovw %k0, %eax
129; AVX512F-NEXT:    # kill: def $ax killed $ax killed $eax
130; AVX512F-NEXT:    vzeroupper
131; AVX512F-NEXT:    retq
132;
133; AVX512BW-LABEL: v16i32:
134; AVX512BW:       # %bb.0:
135; AVX512BW-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
136; AVX512BW-NEXT:    kmovd %k0, %eax
137; AVX512BW-NEXT:    # kill: def $ax killed $ax killed $eax
138; AVX512BW-NEXT:    vzeroupper
139; AVX512BW-NEXT:    retq
140  %x = icmp sgt <16 x i32> %a, %b
141  %res = bitcast <16 x i1> %x to i16
142  ret i16 %res
143}
144
145define i16 @v16f32(<16 x float> %a, <16 x float> %b) {
146; SSE-LABEL: v16f32:
147; SSE:       # %bb.0:
148; SSE-NEXT:    cmpltps %xmm3, %xmm7
149; SSE-NEXT:    cmpltps %xmm2, %xmm6
150; SSE-NEXT:    packssdw %xmm7, %xmm6
151; SSE-NEXT:    cmpltps %xmm1, %xmm5
152; SSE-NEXT:    cmpltps %xmm0, %xmm4
153; SSE-NEXT:    packssdw %xmm5, %xmm4
154; SSE-NEXT:    packsswb %xmm6, %xmm4
155; SSE-NEXT:    pmovmskb %xmm4, %eax
156; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
157; SSE-NEXT:    retq
158;
159; AVX1-LABEL: v16f32:
160; AVX1:       # %bb.0:
161; AVX1-NEXT:    vcmpltps %ymm1, %ymm3, %ymm1
162; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
163; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
164; AVX1-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
165; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
166; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
167; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
168; AVX1-NEXT:    vpmovmskb %xmm0, %eax
169; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
170; AVX1-NEXT:    vzeroupper
171; AVX1-NEXT:    retq
172;
173; AVX2-LABEL: v16f32:
174; AVX2:       # %bb.0:
175; AVX2-NEXT:    vcmpltps %ymm1, %ymm3, %ymm1
176; AVX2-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
177; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
178; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
179; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
180; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
181; AVX2-NEXT:    vpmovmskb %xmm0, %eax
182; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
183; AVX2-NEXT:    vzeroupper
184; AVX2-NEXT:    retq
185;
186; AVX512F-LABEL: v16f32:
187; AVX512F:       # %bb.0:
188; AVX512F-NEXT:    vcmpltps %zmm0, %zmm1, %k0
189; AVX512F-NEXT:    kmovw %k0, %eax
190; AVX512F-NEXT:    # kill: def $ax killed $ax killed $eax
191; AVX512F-NEXT:    vzeroupper
192; AVX512F-NEXT:    retq
193;
194; AVX512BW-LABEL: v16f32:
195; AVX512BW:       # %bb.0:
196; AVX512BW-NEXT:    vcmpltps %zmm0, %zmm1, %k0
197; AVX512BW-NEXT:    kmovd %k0, %eax
198; AVX512BW-NEXT:    # kill: def $ax killed $ax killed $eax
199; AVX512BW-NEXT:    vzeroupper
200; AVX512BW-NEXT:    retq
201  %x = fcmp ogt <16 x float> %a, %b
202  %res = bitcast <16 x i1> %x to i16
203  ret i16 %res
204}
205
206define i64 @v64i8(<64 x i8> %a, <64 x i8> %b) {
207; SSE-LABEL: v64i8:
208; SSE:       # %bb.0:
209; SSE-NEXT:    pcmpgtb %xmm4, %xmm0
210; SSE-NEXT:    pmovmskb %xmm0, %eax
211; SSE-NEXT:    pcmpgtb %xmm5, %xmm1
212; SSE-NEXT:    pmovmskb %xmm1, %ecx
213; SSE-NEXT:    shll $16, %ecx
214; SSE-NEXT:    orl %eax, %ecx
215; SSE-NEXT:    pcmpgtb %xmm6, %xmm2
216; SSE-NEXT:    pmovmskb %xmm2, %edx
217; SSE-NEXT:    pcmpgtb %xmm7, %xmm3
218; SSE-NEXT:    pmovmskb %xmm3, %eax
219; SSE-NEXT:    shll $16, %eax
220; SSE-NEXT:    orl %edx, %eax
221; SSE-NEXT:    shlq $32, %rax
222; SSE-NEXT:    orq %rcx, %rax
223; SSE-NEXT:    retq
224;
225; AVX1-LABEL: v64i8:
226; AVX1:       # %bb.0:
227; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm4
228; AVX1-NEXT:    vpmovmskb %xmm4, %eax
229; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
230; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
231; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
232; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
233; AVX1-NEXT:    shll $16, %ecx
234; AVX1-NEXT:    orl %eax, %ecx
235; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm1, %xmm0
236; AVX1-NEXT:    vpmovmskb %xmm0, %edx
237; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm0
238; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
239; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
240; AVX1-NEXT:    vpmovmskb %xmm0, %eax
241; AVX1-NEXT:    shll $16, %eax
242; AVX1-NEXT:    orl %edx, %eax
243; AVX1-NEXT:    shlq $32, %rax
244; AVX1-NEXT:    orq %rcx, %rax
245; AVX1-NEXT:    vzeroupper
246; AVX1-NEXT:    retq
247;
248; AVX2-LABEL: v64i8:
249; AVX2:       # %bb.0:
250; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
251; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
252; AVX2-NEXT:    vpcmpgtb %ymm3, %ymm1, %ymm0
253; AVX2-NEXT:    vpmovmskb %ymm0, %eax
254; AVX2-NEXT:    shlq $32, %rax
255; AVX2-NEXT:    orq %rcx, %rax
256; AVX2-NEXT:    vzeroupper
257; AVX2-NEXT:    retq
258;
259; AVX512F-LABEL: v64i8:
260; AVX512F:       # %bb.0:
261; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
262; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
263; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm3, %ymm2
264; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
265; AVX512F-NEXT:    vpmovmskb %ymm0, %ecx
266; AVX512F-NEXT:    vpmovmskb %ymm2, %eax
267; AVX512F-NEXT:    shlq $32, %rax
268; AVX512F-NEXT:    orq %rcx, %rax
269; AVX512F-NEXT:    vzeroupper
270; AVX512F-NEXT:    retq
271;
272; AVX512BW-LABEL: v64i8:
273; AVX512BW:       # %bb.0:
274; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0
275; AVX512BW-NEXT:    kmovq %k0, %rax
276; AVX512BW-NEXT:    vzeroupper
277; AVX512BW-NEXT:    retq
278  %x = icmp sgt <64 x i8> %a, %b
279  %res = bitcast <64 x i1> %x to i64
280  ret i64 %res
281}
282
283define i8 @v8i64(<8 x i64> %a, <8 x i64> %b) {
284; SSE-LABEL: v8i64:
285; SSE:       # %bb.0:
286; SSE-NEXT:    pcmpgtq %xmm7, %xmm3
287; SSE-NEXT:    pcmpgtq %xmm6, %xmm2
288; SSE-NEXT:    packssdw %xmm3, %xmm2
289; SSE-NEXT:    pcmpgtq %xmm5, %xmm1
290; SSE-NEXT:    pcmpgtq %xmm4, %xmm0
291; SSE-NEXT:    packssdw %xmm1, %xmm0
292; SSE-NEXT:    packssdw %xmm2, %xmm0
293; SSE-NEXT:    packsswb %xmm0, %xmm0
294; SSE-NEXT:    pmovmskb %xmm0, %eax
295; SSE-NEXT:    # kill: def $al killed $al killed $eax
296; SSE-NEXT:    retq
297;
298; AVX1-LABEL: v8i64:
299; AVX1:       # %bb.0:
300; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
301; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
302; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
303; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm1
304; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
305; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
306; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
307; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
308; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
309; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
310; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
311; AVX1-NEXT:    vmovmskps %ymm0, %eax
312; AVX1-NEXT:    # kill: def $al killed $al killed $eax
313; AVX1-NEXT:    vzeroupper
314; AVX1-NEXT:    retq
315;
316; AVX2-LABEL: v8i64:
317; AVX2:       # %bb.0:
318; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm1, %ymm1
319; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
320; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
321; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
322; AVX2-NEXT:    vmovmskps %ymm0, %eax
323; AVX2-NEXT:    # kill: def $al killed $al killed $eax
324; AVX2-NEXT:    vzeroupper
325; AVX2-NEXT:    retq
326;
327; AVX512F-LABEL: v8i64:
328; AVX512F:       # %bb.0:
329; AVX512F-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
330; AVX512F-NEXT:    kmovw %k0, %eax
331; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
332; AVX512F-NEXT:    vzeroupper
333; AVX512F-NEXT:    retq
334;
335; AVX512BW-LABEL: v8i64:
336; AVX512BW:       # %bb.0:
337; AVX512BW-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
338; AVX512BW-NEXT:    kmovd %k0, %eax
339; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
340; AVX512BW-NEXT:    vzeroupper
341; AVX512BW-NEXT:    retq
342  %x = icmp sgt <8 x i64> %a, %b
343  %res = bitcast <8 x i1> %x to i8
344  ret i8 %res
345}
346
347define i8 @v8f64(<8 x double> %a, <8 x double> %b) {
348; SSE-LABEL: v8f64:
349; SSE:       # %bb.0:
350; SSE-NEXT:    cmpltpd %xmm3, %xmm7
351; SSE-NEXT:    cmpltpd %xmm2, %xmm6
352; SSE-NEXT:    packssdw %xmm7, %xmm6
353; SSE-NEXT:    cmpltpd %xmm1, %xmm5
354; SSE-NEXT:    cmpltpd %xmm0, %xmm4
355; SSE-NEXT:    packssdw %xmm5, %xmm4
356; SSE-NEXT:    packssdw %xmm6, %xmm4
357; SSE-NEXT:    packsswb %xmm4, %xmm4
358; SSE-NEXT:    pmovmskb %xmm4, %eax
359; SSE-NEXT:    # kill: def $al killed $al killed $eax
360; SSE-NEXT:    retq
361;
362; AVX1-LABEL: v8f64:
363; AVX1:       # %bb.0:
364; AVX1-NEXT:    vcmpltpd %ymm1, %ymm3, %ymm1
365; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
366; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
367; AVX1-NEXT:    vcmpltpd %ymm0, %ymm2, %ymm0
368; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
369; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
370; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
371; AVX1-NEXT:    vmovmskps %ymm0, %eax
372; AVX1-NEXT:    # kill: def $al killed $al killed $eax
373; AVX1-NEXT:    vzeroupper
374; AVX1-NEXT:    retq
375;
376; AVX2-LABEL: v8f64:
377; AVX2:       # %bb.0:
378; AVX2-NEXT:    vcmpltpd %ymm1, %ymm3, %ymm1
379; AVX2-NEXT:    vcmpltpd %ymm0, %ymm2, %ymm0
380; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
381; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
382; AVX2-NEXT:    vmovmskps %ymm0, %eax
383; AVX2-NEXT:    # kill: def $al killed $al killed $eax
384; AVX2-NEXT:    vzeroupper
385; AVX2-NEXT:    retq
386;
387; AVX512F-LABEL: v8f64:
388; AVX512F:       # %bb.0:
389; AVX512F-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
390; AVX512F-NEXT:    kmovw %k0, %eax
391; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
392; AVX512F-NEXT:    vzeroupper
393; AVX512F-NEXT:    retq
394;
395; AVX512BW-LABEL: v8f64:
396; AVX512BW:       # %bb.0:
397; AVX512BW-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
398; AVX512BW-NEXT:    kmovd %k0, %eax
399; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
400; AVX512BW-NEXT:    vzeroupper
401; AVX512BW-NEXT:    retq
402  %x = fcmp ogt <8 x double> %a, %b
403  %res = bitcast <8 x i1> %x to i8
404  ret i8 %res
405}
406
407define void @bitcast_64i8_store(ptr %p, <64 x i8> %a0) {
408; SSE-LABEL: bitcast_64i8_store:
409; SSE:       # %bb.0:
410; SSE-NEXT:    pmovmskb %xmm0, %eax
411; SSE-NEXT:    pmovmskb %xmm1, %ecx
412; SSE-NEXT:    shll $16, %ecx
413; SSE-NEXT:    orl %eax, %ecx
414; SSE-NEXT:    pmovmskb %xmm2, %eax
415; SSE-NEXT:    pmovmskb %xmm3, %edx
416; SSE-NEXT:    shll $16, %edx
417; SSE-NEXT:    orl %eax, %edx
418; SSE-NEXT:    shlq $32, %rdx
419; SSE-NEXT:    orq %rcx, %rdx
420; SSE-NEXT:    movq %rdx, (%rdi)
421; SSE-NEXT:    retq
422;
423; AVX1-LABEL: bitcast_64i8_store:
424; AVX1:       # %bb.0:
425; AVX1-NEXT:    vpmovmskb %xmm0, %eax
426; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
427; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
428; AVX1-NEXT:    shll $16, %ecx
429; AVX1-NEXT:    orl %eax, %ecx
430; AVX1-NEXT:    vpmovmskb %xmm1, %eax
431; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
432; AVX1-NEXT:    vpmovmskb %xmm0, %edx
433; AVX1-NEXT:    shll $16, %edx
434; AVX1-NEXT:    orl %eax, %edx
435; AVX1-NEXT:    shlq $32, %rdx
436; AVX1-NEXT:    orq %rcx, %rdx
437; AVX1-NEXT:    movq %rdx, (%rdi)
438; AVX1-NEXT:    vzeroupper
439; AVX1-NEXT:    retq
440;
441; AVX2-LABEL: bitcast_64i8_store:
442; AVX2:       # %bb.0:
443; AVX2-NEXT:    vpmovmskb %ymm1, %eax
444; AVX2-NEXT:    shlq $32, %rax
445; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
446; AVX2-NEXT:    orq %rax, %rcx
447; AVX2-NEXT:    movq %rcx, (%rdi)
448; AVX2-NEXT:    vzeroupper
449; AVX2-NEXT:    retq
450;
451; AVX512F-LABEL: bitcast_64i8_store:
452; AVX512F:       # %bb.0:
453; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
454; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm2
455; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm3
456; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k0
457; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm2
458; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm2
459; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k1
460; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
461; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
462; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm1
463; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k2
464; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
465; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
466; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k3
467; AVX512F-NEXT:    kmovw %k3, 6(%rdi)
468; AVX512F-NEXT:    kmovw %k2, 4(%rdi)
469; AVX512F-NEXT:    kmovw %k1, 2(%rdi)
470; AVX512F-NEXT:    kmovw %k0, (%rdi)
471; AVX512F-NEXT:    vzeroupper
472; AVX512F-NEXT:    retq
473;
474; AVX512BW-LABEL: bitcast_64i8_store:
475; AVX512BW:       # %bb.0:
476; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
477; AVX512BW-NEXT:    kmovq %k0, (%rdi)
478; AVX512BW-NEXT:    vzeroupper
479; AVX512BW-NEXT:    retq
480  %a1 = icmp slt <64 x i8> %a0, zeroinitializer
481  %a2 = bitcast <64 x i1> %a1 to i64
482  store i64 %a2, ptr %p
483  ret void
484}
485
486define void @bitcast_32i16_store(ptr %p, <32 x i16> %a0) {
487; SSE-LABEL: bitcast_32i16_store:
488; SSE:       # %bb.0:
489; SSE-NEXT:    packsswb %xmm1, %xmm0
490; SSE-NEXT:    pmovmskb %xmm0, %eax
491; SSE-NEXT:    packsswb %xmm3, %xmm2
492; SSE-NEXT:    pmovmskb %xmm2, %ecx
493; SSE-NEXT:    shll $16, %ecx
494; SSE-NEXT:    orl %eax, %ecx
495; SSE-NEXT:    movl %ecx, (%rdi)
496; SSE-NEXT:    retq
497;
498; AVX1-LABEL: bitcast_32i16_store:
499; AVX1:       # %bb.0:
500; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
501; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
502; AVX1-NEXT:    vpmovmskb %xmm0, %eax
503; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
504; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm0
505; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
506; AVX1-NEXT:    shll $16, %ecx
507; AVX1-NEXT:    orl %eax, %ecx
508; AVX1-NEXT:    movl %ecx, (%rdi)
509; AVX1-NEXT:    vzeroupper
510; AVX1-NEXT:    retq
511;
512; AVX2-LABEL: bitcast_32i16_store:
513; AVX2:       # %bb.0:
514; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
515; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
516; AVX2-NEXT:    vpmovmskb %ymm0, %eax
517; AVX2-NEXT:    movl %eax, (%rdi)
518; AVX2-NEXT:    vzeroupper
519; AVX2-NEXT:    retq
520;
521; AVX512F-LABEL: bitcast_32i16_store:
522; AVX512F:       # %bb.0:
523; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
524; AVX512F-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm2
525; AVX512F-NEXT:    vpmovsxwd %ymm2, %zmm2
526; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k0
527; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
528; AVX512F-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
529; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
530; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
531; AVX512F-NEXT:    kmovw %k1, 2(%rdi)
532; AVX512F-NEXT:    kmovw %k0, (%rdi)
533; AVX512F-NEXT:    vzeroupper
534; AVX512F-NEXT:    retq
535;
536; AVX512BW-LABEL: bitcast_32i16_store:
537; AVX512BW:       # %bb.0:
538; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
539; AVX512BW-NEXT:    kmovd %k0, (%rdi)
540; AVX512BW-NEXT:    vzeroupper
541; AVX512BW-NEXT:    retq
542  %a1 = icmp slt <32 x i16> %a0, zeroinitializer
543  %a2 = bitcast <32 x i1> %a1 to i32
544  store i32 %a2, ptr %p
545  ret void
546}
547
548define void @bitcast_16i32_store(ptr %p, <16 x i32> %a0) {
549; SSE-LABEL: bitcast_16i32_store:
550; SSE:       # %bb.0:
551; SSE-NEXT:    packssdw %xmm3, %xmm2
552; SSE-NEXT:    packssdw %xmm1, %xmm0
553; SSE-NEXT:    packsswb %xmm2, %xmm0
554; SSE-NEXT:    pmovmskb %xmm0, %eax
555; SSE-NEXT:    movw %ax, (%rdi)
556; SSE-NEXT:    retq
557;
558; AVX1-LABEL: bitcast_16i32_store:
559; AVX1:       # %bb.0:
560; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
561; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
562; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
563; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
564; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
565; AVX1-NEXT:    vpmovmskb %xmm0, %eax
566; AVX1-NEXT:    movw %ax, (%rdi)
567; AVX1-NEXT:    vzeroupper
568; AVX1-NEXT:    retq
569;
570; AVX2-LABEL: bitcast_16i32_store:
571; AVX2:       # %bb.0:
572; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
573; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm2, %ymm1
574; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm2, %ymm0
575; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
576; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
577; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
578; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
579; AVX2-NEXT:    vpmovmskb %xmm0, %eax
580; AVX2-NEXT:    movw %ax, (%rdi)
581; AVX2-NEXT:    vzeroupper
582; AVX2-NEXT:    retq
583;
584; AVX512-LABEL: bitcast_16i32_store:
585; AVX512:       # %bb.0:
586; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
587; AVX512-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
588; AVX512-NEXT:    kmovw %k0, (%rdi)
589; AVX512-NEXT:    vzeroupper
590; AVX512-NEXT:    retq
591  %a1 = icmp slt <16 x i32> %a0, zeroinitializer
592  %a2 = bitcast <16 x i1> %a1 to i16
593  store i16 %a2, ptr %p
594  ret void
595}
596
597define void @bitcast_8i64_store(ptr %p, <8 x i64> %a0) {
598; SSE-LABEL: bitcast_8i64_store:
599; SSE:       # %bb.0:
600; SSE-NEXT:    pxor %xmm4, %xmm4
601; SSE-NEXT:    pxor %xmm5, %xmm5
602; SSE-NEXT:    pcmpgtq %xmm3, %xmm5
603; SSE-NEXT:    pxor %xmm3, %xmm3
604; SSE-NEXT:    pcmpgtq %xmm2, %xmm3
605; SSE-NEXT:    packssdw %xmm5, %xmm3
606; SSE-NEXT:    pxor %xmm2, %xmm2
607; SSE-NEXT:    pcmpgtq %xmm1, %xmm2
608; SSE-NEXT:    pcmpgtq %xmm0, %xmm4
609; SSE-NEXT:    packssdw %xmm2, %xmm4
610; SSE-NEXT:    packssdw %xmm3, %xmm4
611; SSE-NEXT:    packsswb %xmm4, %xmm4
612; SSE-NEXT:    pmovmskb %xmm4, %eax
613; SSE-NEXT:    movb %al, (%rdi)
614; SSE-NEXT:    retq
615;
616; AVX1-LABEL: bitcast_8i64_store:
617; AVX1:       # %bb.0:
618; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
619; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
620; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
621; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
622; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
623; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
624; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
625; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
626; AVX1-NEXT:    vmovmskps %ymm0, %eax
627; AVX1-NEXT:    movb %al, (%rdi)
628; AVX1-NEXT:    vzeroupper
629; AVX1-NEXT:    retq
630;
631; AVX2-LABEL: bitcast_8i64_store:
632; AVX2:       # %bb.0:
633; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
634; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
635; AVX2-NEXT:    vmovmskps %ymm0, %eax
636; AVX2-NEXT:    movb %al, (%rdi)
637; AVX2-NEXT:    vzeroupper
638; AVX2-NEXT:    retq
639;
640; AVX512F-LABEL: bitcast_8i64_store:
641; AVX512F:       # %bb.0:
642; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
643; AVX512F-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
644; AVX512F-NEXT:    kmovw %k0, %eax
645; AVX512F-NEXT:    movb %al, (%rdi)
646; AVX512F-NEXT:    vzeroupper
647; AVX512F-NEXT:    retq
648;
649; AVX512BW-LABEL: bitcast_8i64_store:
650; AVX512BW:       # %bb.0:
651; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
652; AVX512BW-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
653; AVX512BW-NEXT:    kmovd %k0, %eax
654; AVX512BW-NEXT:    movb %al, (%rdi)
655; AVX512BW-NEXT:    vzeroupper
656; AVX512BW-NEXT:    retq
657  %a1 = icmp slt <8 x i64> %a0, zeroinitializer
658  %a2 = bitcast <8 x i1> %a1 to i8
659  store i8 %a2, ptr %p
660  ret void
661}
662