xref: /llvm-project/llvm/test/CodeGen/X86/bitcast-setcc-256.ll (revision 943fda567acb87bea9918b77de3d06f3901b7de2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefix=SSE2-SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefix=SSE2-SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
8
9define i16 @v16i16(<16 x i16> %a, <16 x i16> %b) {
10; SSE2-SSSE3-LABEL: v16i16:
11; SSE2-SSSE3:       # %bb.0:
12; SSE2-SSSE3-NEXT:    pcmpgtw %xmm3, %xmm1
13; SSE2-SSSE3-NEXT:    pcmpgtw %xmm2, %xmm0
14; SSE2-SSSE3-NEXT:    packsswb %xmm1, %xmm0
15; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
16; SSE2-SSSE3-NEXT:    # kill: def $ax killed $ax killed $eax
17; SSE2-SSSE3-NEXT:    retq
18;
19; AVX1-LABEL: v16i16:
20; AVX1:       # %bb.0:
21; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
22; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
23; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
24; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
25; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
26; AVX1-NEXT:    vpmovmskb %xmm0, %eax
27; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
28; AVX1-NEXT:    vzeroupper
29; AVX1-NEXT:    retq
30;
31; AVX2-LABEL: v16i16:
32; AVX2:       # %bb.0:
33; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
34; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
35; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
36; AVX2-NEXT:    vpmovmskb %xmm0, %eax
37; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
38; AVX2-NEXT:    vzeroupper
39; AVX2-NEXT:    retq
40;
41; AVX512F-LABEL: v16i16:
42; AVX512F:       # %bb.0:
43; AVX512F-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
44; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
45; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
46; AVX512F-NEXT:    kmovw %k0, %eax
47; AVX512F-NEXT:    # kill: def $ax killed $ax killed $eax
48; AVX512F-NEXT:    vzeroupper
49; AVX512F-NEXT:    retq
50;
51; AVX512BW-LABEL: v16i16:
52; AVX512BW:       # %bb.0:
53; AVX512BW-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
54; AVX512BW-NEXT:    kmovd %k0, %eax
55; AVX512BW-NEXT:    # kill: def $ax killed $ax killed $eax
56; AVX512BW-NEXT:    vzeroupper
57; AVX512BW-NEXT:    retq
58  %x = icmp sgt <16 x i16> %a, %b
59  %res = bitcast <16 x i1> %x to i16
60  ret i16 %res
61}
62
63define i8 @v8i32(<8 x i32> %a, <8 x i32> %b) {
64; SSE2-SSSE3-LABEL: v8i32:
65; SSE2-SSSE3:       # %bb.0:
66; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm1
67; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm0
68; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
69; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
70; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
71; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
72; SSE2-SSSE3-NEXT:    retq
73;
74; AVX1-LABEL: v8i32:
75; AVX1:       # %bb.0:
76; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
77; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
78; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
79; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
80; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
81; AVX1-NEXT:    vmovmskps %ymm0, %eax
82; AVX1-NEXT:    # kill: def $al killed $al killed $eax
83; AVX1-NEXT:    vzeroupper
84; AVX1-NEXT:    retq
85;
86; AVX2-LABEL: v8i32:
87; AVX2:       # %bb.0:
88; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
89; AVX2-NEXT:    vmovmskps %ymm0, %eax
90; AVX2-NEXT:    # kill: def $al killed $al killed $eax
91; AVX2-NEXT:    vzeroupper
92; AVX2-NEXT:    retq
93;
94; AVX512F-LABEL: v8i32:
95; AVX512F:       # %bb.0:
96; AVX512F-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
97; AVX512F-NEXT:    kmovw %k0, %eax
98; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
99; AVX512F-NEXT:    vzeroupper
100; AVX512F-NEXT:    retq
101;
102; AVX512BW-LABEL: v8i32:
103; AVX512BW:       # %bb.0:
104; AVX512BW-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
105; AVX512BW-NEXT:    kmovd %k0, %eax
106; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
107; AVX512BW-NEXT:    vzeroupper
108; AVX512BW-NEXT:    retq
109  %x = icmp sgt <8 x i32> %a, %b
110  %res = bitcast <8 x i1> %x to i8
111  ret i8 %res
112}
113
114define i8 @v8f32(<8 x float> %a, <8 x float> %b) {
115; SSE2-SSSE3-LABEL: v8f32:
116; SSE2-SSSE3:       # %bb.0:
117; SSE2-SSSE3-NEXT:    cmpltps %xmm1, %xmm3
118; SSE2-SSSE3-NEXT:    cmpltps %xmm0, %xmm2
119; SSE2-SSSE3-NEXT:    packssdw %xmm3, %xmm2
120; SSE2-SSSE3-NEXT:    packsswb %xmm2, %xmm2
121; SSE2-SSSE3-NEXT:    pmovmskb %xmm2, %eax
122; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
123; SSE2-SSSE3-NEXT:    retq
124;
125; AVX12-LABEL: v8f32:
126; AVX12:       # %bb.0:
127; AVX12-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
128; AVX12-NEXT:    vmovmskps %ymm0, %eax
129; AVX12-NEXT:    # kill: def $al killed $al killed $eax
130; AVX12-NEXT:    vzeroupper
131; AVX12-NEXT:    retq
132;
133; AVX512F-LABEL: v8f32:
134; AVX512F:       # %bb.0:
135; AVX512F-NEXT:    vcmpltps %ymm0, %ymm1, %k0
136; AVX512F-NEXT:    kmovw %k0, %eax
137; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
138; AVX512F-NEXT:    vzeroupper
139; AVX512F-NEXT:    retq
140;
141; AVX512BW-LABEL: v8f32:
142; AVX512BW:       # %bb.0:
143; AVX512BW-NEXT:    vcmpltps %ymm0, %ymm1, %k0
144; AVX512BW-NEXT:    kmovd %k0, %eax
145; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
146; AVX512BW-NEXT:    vzeroupper
147; AVX512BW-NEXT:    retq
148  %x = fcmp ogt <8 x float> %a, %b
149  %res = bitcast <8 x i1> %x to i8
150  ret i8 %res
151}
152
153define i32 @v32i8(<32 x i8> %a, <32 x i8> %b) {
154; SSE2-SSSE3-LABEL: v32i8:
155; SSE2-SSSE3:       # %bb.0:
156; SSE2-SSSE3-NEXT:    pcmpgtb %xmm2, %xmm0
157; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %ecx
158; SSE2-SSSE3-NEXT:    pcmpgtb %xmm3, %xmm1
159; SSE2-SSSE3-NEXT:    pmovmskb %xmm1, %eax
160; SSE2-SSSE3-NEXT:    shll $16, %eax
161; SSE2-SSSE3-NEXT:    orl %ecx, %eax
162; SSE2-SSSE3-NEXT:    retq
163;
164; AVX1-LABEL: v32i8:
165; AVX1:       # %bb.0:
166; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm2
167; AVX1-NEXT:    vpmovmskb %xmm2, %ecx
168; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
169; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
170; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
171; AVX1-NEXT:    vpmovmskb %xmm0, %eax
172; AVX1-NEXT:    shll $16, %eax
173; AVX1-NEXT:    orl %ecx, %eax
174; AVX1-NEXT:    vzeroupper
175; AVX1-NEXT:    retq
176;
177; AVX2-LABEL: v32i8:
178; AVX2:       # %bb.0:
179; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
180; AVX2-NEXT:    vpmovmskb %ymm0, %eax
181; AVX2-NEXT:    vzeroupper
182; AVX2-NEXT:    retq
183;
184; AVX512F-LABEL: v32i8:
185; AVX512F:       # %bb.0:
186; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
187; AVX512F-NEXT:    vpmovmskb %ymm0, %eax
188; AVX512F-NEXT:    vzeroupper
189; AVX512F-NEXT:    retq
190;
191; AVX512BW-LABEL: v32i8:
192; AVX512BW:       # %bb.0:
193; AVX512BW-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0
194; AVX512BW-NEXT:    kmovd %k0, %eax
195; AVX512BW-NEXT:    vzeroupper
196; AVX512BW-NEXT:    retq
197  %x = icmp sgt <32 x i8> %a, %b
198  %res = bitcast <32 x i1> %x to i32
199  ret i32 %res
200}
201
202define i4 @v4i64(<4 x i64> %a, <4 x i64> %b) {
203; SSE2-SSSE3-LABEL: v4i64:
204; SSE2-SSSE3:       # %bb.0:
205; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
206; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm3
207; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm1
208; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm5
209; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm5
210; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm2
211; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm0
212; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm4
213; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm4
214; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm6
215; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2]
216; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm1
217; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm0
218; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
219; SSE2-SSSE3-NEXT:    andps %xmm6, %xmm0
220; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
221; SSE2-SSSE3-NEXT:    orps %xmm0, %xmm4
222; SSE2-SSSE3-NEXT:    movmskps %xmm4, %eax
223; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
224; SSE2-SSSE3-NEXT:    retq
225;
226; AVX1-LABEL: v4i64:
227; AVX1:       # %bb.0:
228; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
229; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
230; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
231; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
232; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
233; AVX1-NEXT:    vmovmskpd %ymm0, %eax
234; AVX1-NEXT:    # kill: def $al killed $al killed $eax
235; AVX1-NEXT:    vzeroupper
236; AVX1-NEXT:    retq
237;
238; AVX2-LABEL: v4i64:
239; AVX2:       # %bb.0:
240; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
241; AVX2-NEXT:    vmovmskpd %ymm0, %eax
242; AVX2-NEXT:    # kill: def $al killed $al killed $eax
243; AVX2-NEXT:    vzeroupper
244; AVX2-NEXT:    retq
245;
246; AVX512F-LABEL: v4i64:
247; AVX512F:       # %bb.0:
248; AVX512F-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
249; AVX512F-NEXT:    kmovw %k0, %eax
250; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
251; AVX512F-NEXT:    vzeroupper
252; AVX512F-NEXT:    retq
253;
254; AVX512BW-LABEL: v4i64:
255; AVX512BW:       # %bb.0:
256; AVX512BW-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
257; AVX512BW-NEXT:    kmovd %k0, %eax
258; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
259; AVX512BW-NEXT:    vzeroupper
260; AVX512BW-NEXT:    retq
261  %x = icmp sgt <4 x i64> %a, %b
262  %res = bitcast <4 x i1> %x to i4
263  ret i4 %res
264}
265
266define i4 @v4f64(<4 x double> %a, <4 x double> %b) {
267; SSE2-SSSE3-LABEL: v4f64:
268; SSE2-SSSE3:       # %bb.0:
269; SSE2-SSSE3-NEXT:    cmpltpd %xmm1, %xmm3
270; SSE2-SSSE3-NEXT:    cmpltpd %xmm0, %xmm2
271; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
272; SSE2-SSSE3-NEXT:    movmskps %xmm2, %eax
273; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
274; SSE2-SSSE3-NEXT:    retq
275;
276; AVX12-LABEL: v4f64:
277; AVX12:       # %bb.0:
278; AVX12-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
279; AVX12-NEXT:    vmovmskpd %ymm0, %eax
280; AVX12-NEXT:    # kill: def $al killed $al killed $eax
281; AVX12-NEXT:    vzeroupper
282; AVX12-NEXT:    retq
283;
284; AVX512F-LABEL: v4f64:
285; AVX512F:       # %bb.0:
286; AVX512F-NEXT:    vcmpltpd %ymm0, %ymm1, %k0
287; AVX512F-NEXT:    kmovw %k0, %eax
288; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
289; AVX512F-NEXT:    vzeroupper
290; AVX512F-NEXT:    retq
291;
292; AVX512BW-LABEL: v4f64:
293; AVX512BW:       # %bb.0:
294; AVX512BW-NEXT:    vcmpltpd %ymm0, %ymm1, %k0
295; AVX512BW-NEXT:    kmovd %k0, %eax
296; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
297; AVX512BW-NEXT:    vzeroupper
298; AVX512BW-NEXT:    retq
299  %x = fcmp ogt <4 x double> %a, %b
300  %res = bitcast <4 x i1> %x to i4
301  ret i4 %res
302}
303
304define void @bitcast_32i8_store(ptr %p, <32 x i8> %a0) {
305; SSE2-SSSE3-LABEL: bitcast_32i8_store:
306; SSE2-SSSE3:       # %bb.0:
307; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
308; SSE2-SSSE3-NEXT:    pmovmskb %xmm1, %ecx
309; SSE2-SSSE3-NEXT:    shll $16, %ecx
310; SSE2-SSSE3-NEXT:    orl %eax, %ecx
311; SSE2-SSSE3-NEXT:    movl %ecx, (%rdi)
312; SSE2-SSSE3-NEXT:    retq
313;
314; AVX1-LABEL: bitcast_32i8_store:
315; AVX1:       # %bb.0:
316; AVX1-NEXT:    vpmovmskb %xmm0, %eax
317; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
318; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
319; AVX1-NEXT:    shll $16, %ecx
320; AVX1-NEXT:    orl %eax, %ecx
321; AVX1-NEXT:    movl %ecx, (%rdi)
322; AVX1-NEXT:    vzeroupper
323; AVX1-NEXT:    retq
324;
325; AVX2-LABEL: bitcast_32i8_store:
326; AVX2:       # %bb.0:
327; AVX2-NEXT:    vpmovmskb %ymm0, %eax
328; AVX2-NEXT:    movl %eax, (%rdi)
329; AVX2-NEXT:    vzeroupper
330; AVX2-NEXT:    retq
331;
332; AVX512F-LABEL: bitcast_32i8_store:
333; AVX512F:       # %bb.0:
334; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
335; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
336; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm1
337; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0
338; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
339; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
340; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
341; AVX512F-NEXT:    kmovw %k1, 2(%rdi)
342; AVX512F-NEXT:    kmovw %k0, (%rdi)
343; AVX512F-NEXT:    vzeroupper
344; AVX512F-NEXT:    retq
345;
346; AVX512BW-LABEL: bitcast_32i8_store:
347; AVX512BW:       # %bb.0:
348; AVX512BW-NEXT:    vpmovb2m %ymm0, %k0
349; AVX512BW-NEXT:    kmovd %k0, (%rdi)
350; AVX512BW-NEXT:    vzeroupper
351; AVX512BW-NEXT:    retq
352  %a1 = icmp slt <32 x i8> %a0, zeroinitializer
353  %a2 = bitcast <32 x i1> %a1 to i32
354  store i32 %a2, ptr %p
355  ret void
356}
357
358define void @bitcast_16i16_store(ptr %p, <16 x i16> %a0) {
359; SSE2-SSSE3-LABEL: bitcast_16i16_store:
360; SSE2-SSSE3:       # %bb.0:
361; SSE2-SSSE3-NEXT:    packsswb %xmm1, %xmm0
362; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
363; SSE2-SSSE3-NEXT:    movw %ax, (%rdi)
364; SSE2-SSSE3-NEXT:    retq
365;
366; AVX1-LABEL: bitcast_16i16_store:
367; AVX1:       # %bb.0:
368; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
369; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
370; AVX1-NEXT:    vpmovmskb %xmm0, %eax
371; AVX1-NEXT:    movw %ax, (%rdi)
372; AVX1-NEXT:    vzeroupper
373; AVX1-NEXT:    retq
374;
375; AVX2-LABEL: bitcast_16i16_store:
376; AVX2:       # %bb.0:
377; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
378; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
379; AVX2-NEXT:    vpmovmskb %xmm0, %eax
380; AVX2-NEXT:    movw %ax, (%rdi)
381; AVX2-NEXT:    vzeroupper
382; AVX2-NEXT:    retq
383;
384; AVX512F-LABEL: bitcast_16i16_store:
385; AVX512F:       # %bb.0:
386; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
387; AVX512F-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
388; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
389; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
390; AVX512F-NEXT:    kmovw %k0, (%rdi)
391; AVX512F-NEXT:    vzeroupper
392; AVX512F-NEXT:    retq
393;
394; AVX512BW-LABEL: bitcast_16i16_store:
395; AVX512BW:       # %bb.0:
396; AVX512BW-NEXT:    vpmovw2m %ymm0, %k0
397; AVX512BW-NEXT:    kmovw %k0, (%rdi)
398; AVX512BW-NEXT:    vzeroupper
399; AVX512BW-NEXT:    retq
400  %a1 = icmp slt <16 x i16> %a0, zeroinitializer
401  %a2 = bitcast <16 x i1> %a1 to i16
402  store i16 %a2, ptr %p
403  ret void
404}
405
406define void @bitcast_8i32_store(ptr %p, <8 x i32> %a0) {
407; SSE2-SSSE3-LABEL: bitcast_8i32_store:
408; SSE2-SSSE3:       # %bb.0:
409; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
410; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
411; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
412; SSE2-SSSE3-NEXT:    movb %al, (%rdi)
413; SSE2-SSSE3-NEXT:    retq
414;
415; AVX12-LABEL: bitcast_8i32_store:
416; AVX12:       # %bb.0:
417; AVX12-NEXT:    vmovmskps %ymm0, %eax
418; AVX12-NEXT:    movb %al, (%rdi)
419; AVX12-NEXT:    vzeroupper
420; AVX12-NEXT:    retq
421;
422; AVX512-LABEL: bitcast_8i32_store:
423; AVX512:       # %bb.0:
424; AVX512-NEXT:    vmovmskps %ymm0, %eax
425; AVX512-NEXT:    movb %al, (%rdi)
426; AVX512-NEXT:    vzeroupper
427; AVX512-NEXT:    retq
428  %a1 = icmp slt <8 x i32> %a0, zeroinitializer
429  %a2 = bitcast <8 x i1> %a1 to i8
430  store i8 %a2, ptr %p
431  ret void
432}
433
434define void @bitcast_4i64_store(ptr %p, <4 x i64> %a0) {
435; SSE2-SSSE3-LABEL: bitcast_4i64_store:
436; SSE2-SSSE3:       # %bb.0:
437; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
438; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
439; SSE2-SSSE3-NEXT:    movb %al, (%rdi)
440; SSE2-SSSE3-NEXT:    retq
441;
442; AVX12-LABEL: bitcast_4i64_store:
443; AVX12:       # %bb.0:
444; AVX12-NEXT:    vmovmskpd %ymm0, %eax
445; AVX12-NEXT:    movb %al, (%rdi)
446; AVX12-NEXT:    vzeroupper
447; AVX12-NEXT:    retq
448;
449; AVX512F-LABEL: bitcast_4i64_store:
450; AVX512F:       # %bb.0:
451; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
452; AVX512F-NEXT:    vpcmpgtq %ymm0, %ymm1, %k0
453; AVX512F-NEXT:    kmovw %k0, %eax
454; AVX512F-NEXT:    movb %al, (%rdi)
455; AVX512F-NEXT:    vzeroupper
456; AVX512F-NEXT:    retq
457;
458; AVX512BW-LABEL: bitcast_4i64_store:
459; AVX512BW:       # %bb.0:
460; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
461; AVX512BW-NEXT:    vpcmpgtq %ymm0, %ymm1, %k0
462; AVX512BW-NEXT:    kmovd %k0, %eax
463; AVX512BW-NEXT:    movb %al, (%rdi)
464; AVX512BW-NEXT:    vzeroupper
465; AVX512BW-NEXT:    retq
466  %a1 = icmp slt <4 x i64> %a0, zeroinitializer
467  %a2 = bitcast <4 x i1> %a1 to i4
468  store i4 %a2, ptr %p
469  ret void
470}
471