xref: /llvm-project/llvm/test/CodeGen/X86/bitcast-vector-bool.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX12,AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX12,AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
8
9;
10; 128-bit vectors
11;
12
13define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
14; SSE-LABEL: bitcast_v2i64_to_v2i1:
15; SSE:       # %bb.0:
16; SSE-NEXT:    movmskpd %xmm0, %ecx
17; SSE-NEXT:    movl %ecx, %eax
18; SSE-NEXT:    shrb %al
19; SSE-NEXT:    addb %cl, %al
20; SSE-NEXT:    retq
21;
22; AVX12-LABEL: bitcast_v2i64_to_v2i1:
23; AVX12:       # %bb.0:
24; AVX12-NEXT:    vmovmskpd %xmm0, %ecx
25; AVX12-NEXT:    movl %ecx, %eax
26; AVX12-NEXT:    shrb %al
27; AVX12-NEXT:    addb %cl, %al
28; AVX12-NEXT:    retq
29;
30; AVX512-LABEL: bitcast_v2i64_to_v2i1:
31; AVX512:       # %bb.0:
32; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33; AVX512-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0
34; AVX512-NEXT:    kshiftrw $1, %k0, %k1
35; AVX512-NEXT:    kmovd %k1, %ecx
36; AVX512-NEXT:    kmovd %k0, %eax
37; AVX512-NEXT:    addb %cl, %al
38; AVX512-NEXT:    # kill: def $al killed $al killed $eax
39; AVX512-NEXT:    retq
40  %1 = icmp slt <2 x i64> %a0, zeroinitializer
41  %2 = bitcast <2 x i1> %1 to <2 x i1>
42  %3 = extractelement <2 x i1> %2, i32 0
43  %4 = extractelement <2 x i1> %2, i32 1
44  %5 = add i1 %3, %4
45  ret i1 %5
46}
47
48define i1 @trunc_v2i64_cmp(<2 x i64> %a0) nounwind {
49; SSE2-SSSE3-LABEL: trunc_v2i64_cmp:
50; SSE2-SSSE3:       # %bb.0:
51; SSE2-SSSE3-NEXT:    psllq $63, %xmm0
52; SSE2-SSSE3-NEXT:    movmskpd %xmm0, %eax
53; SSE2-SSSE3-NEXT:    testl %eax, %eax
54; SSE2-SSSE3-NEXT:    sete %al
55; SSE2-SSSE3-NEXT:    retq
56;
57; SSE41-LABEL: trunc_v2i64_cmp:
58; SSE41:       # %bb.0:
59; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
60; SSE41-NEXT:    sete %al
61; SSE41-NEXT:    retq
62;
63; AVX12-LABEL: trunc_v2i64_cmp:
64; AVX12:       # %bb.0:
65; AVX12-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
66; AVX12-NEXT:    sete %al
67; AVX12-NEXT:    retq
68;
69; AVX512-LABEL: trunc_v2i64_cmp:
70; AVX512:       # %bb.0:
71; AVX512-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [1,1]
72; AVX512-NEXT:    vptest %xmm1, %xmm0
73; AVX512-NEXT:    sete %al
74; AVX512-NEXT:    retq
75  %1 = trunc <2 x i64> %a0 to <2 x i1>
76  %2 = bitcast <2 x i1> %1 to i2
77  %3 = icmp eq i2 %2, 0
78  ret i1 %3
79}
80
81define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
82; SSE-LABEL: bitcast_v4i32_to_v2i2:
83; SSE:       # %bb.0:
84; SSE-NEXT:    movmskps %xmm0, %eax
85; SSE-NEXT:    movl %eax, %ecx
86; SSE-NEXT:    shrb $2, %cl
87; SSE-NEXT:    andb $3, %al
88; SSE-NEXT:    addb %cl, %al
89; SSE-NEXT:    # kill: def $al killed $al killed $eax
90; SSE-NEXT:    retq
91;
92; AVX-LABEL: bitcast_v4i32_to_v2i2:
93; AVX:       # %bb.0:
94; AVX-NEXT:    vmovmskps %xmm0, %eax
95; AVX-NEXT:    movl %eax, %ecx
96; AVX-NEXT:    shrb $2, %cl
97; AVX-NEXT:    andb $3, %al
98; AVX-NEXT:    addb %cl, %al
99; AVX-NEXT:    # kill: def $al killed $al killed $eax
100; AVX-NEXT:    retq
101  %1 = icmp slt <4 x i32> %a0, zeroinitializer
102  %2 = bitcast <4 x i1> %1 to <2 x i2>
103  %3 = extractelement <2 x i2> %2, i32 0
104  %4 = extractelement <2 x i2> %2, i32 1
105  %5 = add i2 %3, %4
106  ret i2 %5
107}
108
109define i1 @trunc_v4i32_cmp(<4 x i32> %a0) nounwind {
110; SSE2-SSSE3-LABEL: trunc_v4i32_cmp:
111; SSE2-SSSE3:       # %bb.0:
112; SSE2-SSSE3-NEXT:    pslld $31, %xmm0
113; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
114; SSE2-SSSE3-NEXT:    xorl $15, %eax
115; SSE2-SSSE3-NEXT:    sete %al
116; SSE2-SSSE3-NEXT:    retq
117;
118; SSE41-LABEL: trunc_v4i32_cmp:
119; SSE41:       # %bb.0:
120; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
121; SSE41-NEXT:    setb %al
122; SSE41-NEXT:    retq
123;
124; AVX12-LABEL: trunc_v4i32_cmp:
125; AVX12:       # %bb.0:
126; AVX12-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
127; AVX12-NEXT:    setb %al
128; AVX12-NEXT:    retq
129;
130; AVX512-LABEL: trunc_v4i32_cmp:
131; AVX512:       # %bb.0:
132; AVX512-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297]
133; AVX512-NEXT:    vptest %xmm1, %xmm0
134; AVX512-NEXT:    setb %al
135; AVX512-NEXT:    retq
136  %1 = trunc <4 x i32> %a0 to <4 x i1>
137  %2 = bitcast <4 x i1> %1 to i4
138  %3 = icmp eq i4 %2, -1
139  ret i1 %3
140}
141
142define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
143; SSE-LABEL: bitcast_v8i16_to_v2i4:
144; SSE:       # %bb.0:
145; SSE-NEXT:    packsswb %xmm0, %xmm0
146; SSE-NEXT:    pmovmskb %xmm0, %eax
147; SSE-NEXT:    movl %eax, %ecx
148; SSE-NEXT:    shrb $4, %cl
149; SSE-NEXT:    andb $15, %al
150; SSE-NEXT:    addb %cl, %al
151; SSE-NEXT:    # kill: def $al killed $al killed $eax
152; SSE-NEXT:    retq
153;
154; AVX12-LABEL: bitcast_v8i16_to_v2i4:
155; AVX12:       # %bb.0:
156; AVX12-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
157; AVX12-NEXT:    vpmovmskb %xmm0, %eax
158; AVX12-NEXT:    movl %eax, %ecx
159; AVX12-NEXT:    shrb $4, %cl
160; AVX12-NEXT:    andb $15, %al
161; AVX12-NEXT:    addb %cl, %al
162; AVX12-NEXT:    # kill: def $al killed $al killed $eax
163; AVX12-NEXT:    retq
164;
165; AVX512-LABEL: bitcast_v8i16_to_v2i4:
166; AVX512:       # %bb.0:
167; AVX512-NEXT:    vpmovw2m %xmm0, %k0
168; AVX512-NEXT:    kmovd %k0, %eax
169; AVX512-NEXT:    movl %eax, %ecx
170; AVX512-NEXT:    shrb $4, %cl
171; AVX512-NEXT:    andb $15, %al
172; AVX512-NEXT:    addb %cl, %al
173; AVX512-NEXT:    # kill: def $al killed $al killed $eax
174; AVX512-NEXT:    retq
175  %1 = icmp slt <8 x i16> %a0, zeroinitializer
176  %2 = bitcast <8 x i1> %1 to <2 x i4>
177  %3 = extractelement <2 x i4> %2, i32 0
178  %4 = extractelement <2 x i4> %2, i32 1
179  %5 = add i4 %3, %4
180  ret i4 %5
181}
182
183define i1 @trunc_v8i16_cmp(<8 x i16> %a0) nounwind {
184; SSE2-SSSE3-LABEL: trunc_v8i16_cmp:
185; SSE2-SSSE3:       # %bb.0:
186; SSE2-SSSE3-NEXT:    psllw $7, %xmm0
187; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
188; SSE2-SSSE3-NEXT:    testl $21845, %eax # imm = 0x5555
189; SSE2-SSSE3-NEXT:    setne %al
190; SSE2-SSSE3-NEXT:    retq
191;
192; SSE41-LABEL: trunc_v8i16_cmp:
193; SSE41:       # %bb.0:
194; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
195; SSE41-NEXT:    setne %al
196; SSE41-NEXT:    retq
197;
198; AVX12-LABEL: trunc_v8i16_cmp:
199; AVX12:       # %bb.0:
200; AVX12-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
201; AVX12-NEXT:    setne %al
202; AVX12-NEXT:    retq
203;
204; AVX512-LABEL: trunc_v8i16_cmp:
205; AVX512:       # %bb.0:
206; AVX512-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489]
207; AVX512-NEXT:    vptest %xmm1, %xmm0
208; AVX512-NEXT:    setne %al
209; AVX512-NEXT:    retq
210  %1 = trunc <8 x i16> %a0 to <8 x i1>
211  %2 = bitcast <8 x i1> %1 to i8
212  %3 = icmp ne i8 %2, 0
213  ret i1 %3
214}
215
216define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
217; SSE-LABEL: bitcast_v16i8_to_v2i8:
218; SSE:       # %bb.0:
219; SSE-NEXT:    pmovmskb %xmm0, %ecx
220; SSE-NEXT:    movl %ecx, %eax
221; SSE-NEXT:    shrl $8, %eax
222; SSE-NEXT:    addb %cl, %al
223; SSE-NEXT:    # kill: def $al killed $al killed $eax
224; SSE-NEXT:    retq
225;
226; AVX12-LABEL: bitcast_v16i8_to_v2i8:
227; AVX12:       # %bb.0:
228; AVX12-NEXT:    vpmovmskb %xmm0, %ecx
229; AVX12-NEXT:    movl %ecx, %eax
230; AVX12-NEXT:    shrl $8, %eax
231; AVX12-NEXT:    addb %cl, %al
232; AVX12-NEXT:    # kill: def $al killed $al killed $eax
233; AVX12-NEXT:    retq
234;
235; AVX512-LABEL: bitcast_v16i8_to_v2i8:
236; AVX512:       # %bb.0:
237; AVX512-NEXT:    vpmovb2m %xmm0, %k0
238; AVX512-NEXT:    kshiftrw $8, %k0, %k1
239; AVX512-NEXT:    kmovd %k0, %ecx
240; AVX512-NEXT:    kmovd %k1, %eax
241; AVX512-NEXT:    addb %cl, %al
242; AVX512-NEXT:    # kill: def $al killed $al killed $eax
243; AVX512-NEXT:    retq
244  %1 = icmp slt <16 x i8> %a0, zeroinitializer
245  %2 = bitcast <16 x i1> %1 to <2 x i8>
246  %3 = extractelement <2 x i8> %2, i32 0
247  %4 = extractelement <2 x i8> %2, i32 1
248  %5 = add i8 %3, %4
249  ret i8 %5
250}
251
252define i1 @trunc_v16i8_cmp(<16 x i8> %a0) nounwind {
253; SSE2-SSSE3-LABEL: trunc_v16i8_cmp:
254; SSE2-SSSE3:       # %bb.0:
255; SSE2-SSSE3-NEXT:    psllw $7, %xmm0
256; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
257; SSE2-SSSE3-NEXT:    xorl $65535, %eax # imm = 0xFFFF
258; SSE2-SSSE3-NEXT:    setne %al
259; SSE2-SSSE3-NEXT:    retq
260;
261; SSE41-LABEL: trunc_v16i8_cmp:
262; SSE41:       # %bb.0:
263; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
264; SSE41-NEXT:    setae %al
265; SSE41-NEXT:    retq
266;
267; AVX12-LABEL: trunc_v16i8_cmp:
268; AVX12:       # %bb.0:
269; AVX12-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
270; AVX12-NEXT:    setae %al
271; AVX12-NEXT:    retq
272;
273; AVX512-LABEL: trunc_v16i8_cmp:
274; AVX512:       # %bb.0:
275; AVX512-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673]
276; AVX512-NEXT:    vptest %xmm1, %xmm0
277; AVX512-NEXT:    setae %al
278; AVX512-NEXT:    retq
279  %1 = trunc <16 x i8> %a0 to <16 x i1>
280  %2 = bitcast <16 x i1> %1 to i16
281  %3 = icmp ne i16 %2, -1
282  ret i1 %3
283}
284
285;
286; 256-bit vectors
287;
288
289define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
290; SSE-LABEL: bitcast_v4i64_to_v2i2:
291; SSE:       # %bb.0:
292; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
293; SSE-NEXT:    movmskps %xmm0, %eax
294; SSE-NEXT:    movl %eax, %ecx
295; SSE-NEXT:    shrb $2, %cl
296; SSE-NEXT:    andb $3, %al
297; SSE-NEXT:    addb %cl, %al
298; SSE-NEXT:    # kill: def $al killed $al killed $eax
299; SSE-NEXT:    retq
300;
301; AVX-LABEL: bitcast_v4i64_to_v2i2:
302; AVX:       # %bb.0:
303; AVX-NEXT:    vmovmskpd %ymm0, %eax
304; AVX-NEXT:    movl %eax, %ecx
305; AVX-NEXT:    shrb $2, %cl
306; AVX-NEXT:    andb $3, %al
307; AVX-NEXT:    addb %cl, %al
308; AVX-NEXT:    # kill: def $al killed $al killed $eax
309; AVX-NEXT:    vzeroupper
310; AVX-NEXT:    retq
311  %1 = icmp slt <4 x i64> %a0, zeroinitializer
312  %2 = bitcast <4 x i1> %1 to <2 x i2>
313  %3 = extractelement <2 x i2> %2, i32 0
314  %4 = extractelement <2 x i2> %2, i32 1
315  %5 = add i2 %3, %4
316  ret i2 %5
317}
318
319define i1 @trunc_v4i64_cmp(<4 x i64> %a0) nounwind {
320; SSE2-SSSE3-LABEL: trunc_v4i64_cmp:
321; SSE2-SSSE3:       # %bb.0:
322; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
323; SSE2-SSSE3-NEXT:    pslld $31, %xmm0
324; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
325; SSE2-SSSE3-NEXT:    testl %eax, %eax
326; SSE2-SSSE3-NEXT:    setne %al
327; SSE2-SSSE3-NEXT:    retq
328;
329; SSE41-LABEL: trunc_v4i64_cmp:
330; SSE41:       # %bb.0:
331; SSE41-NEXT:    por %xmm1, %xmm0
332; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
333; SSE41-NEXT:    setne %al
334; SSE41-NEXT:    retq
335;
336; AVX1-LABEL: trunc_v4i64_cmp:
337; AVX1:       # %bb.0:
338; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
339; AVX1-NEXT:    setne %al
340; AVX1-NEXT:    vzeroupper
341; AVX1-NEXT:    retq
342;
343; AVX2-LABEL: trunc_v4i64_cmp:
344; AVX2:       # %bb.0:
345; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
346; AVX2-NEXT:    vptest %ymm1, %ymm0
347; AVX2-NEXT:    setne %al
348; AVX2-NEXT:    vzeroupper
349; AVX2-NEXT:    retq
350;
351; AVX512-LABEL: trunc_v4i64_cmp:
352; AVX512:       # %bb.0:
353; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
354; AVX512-NEXT:    vptest %ymm1, %ymm0
355; AVX512-NEXT:    setne %al
356; AVX512-NEXT:    vzeroupper
357; AVX512-NEXT:    retq
358  %1 = trunc <4 x i64> %a0 to <4 x i1>
359  %2 = bitcast <4 x i1> %1 to i4
360  %3 = icmp ne i4 %2, 0
361  ret i1 %3
362}
363
364define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
365; SSE-LABEL: bitcast_v8i32_to_v2i4:
366; SSE:       # %bb.0:
367; SSE-NEXT:    packssdw %xmm1, %xmm0
368; SSE-NEXT:    packsswb %xmm0, %xmm0
369; SSE-NEXT:    pmovmskb %xmm0, %eax
370; SSE-NEXT:    movl %eax, %ecx
371; SSE-NEXT:    shrb $4, %cl
372; SSE-NEXT:    andb $15, %al
373; SSE-NEXT:    addb %cl, %al
374; SSE-NEXT:    # kill: def $al killed $al killed $eax
375; SSE-NEXT:    retq
376;
377; AVX-LABEL: bitcast_v8i32_to_v2i4:
378; AVX:       # %bb.0:
379; AVX-NEXT:    vmovmskps %ymm0, %eax
380; AVX-NEXT:    movl %eax, %ecx
381; AVX-NEXT:    shrb $4, %cl
382; AVX-NEXT:    andb $15, %al
383; AVX-NEXT:    addb %cl, %al
384; AVX-NEXT:    # kill: def $al killed $al killed $eax
385; AVX-NEXT:    vzeroupper
386; AVX-NEXT:    retq
387  %1 = icmp slt <8 x i32> %a0, zeroinitializer
388  %2 = bitcast <8 x i1> %1 to <2 x i4>
389  %3 = extractelement <2 x i4> %2, i32 0
390  %4 = extractelement <2 x i4> %2, i32 1
391  %5 = add i4 %3, %4
392  ret i4 %5
393}
394
395define i1 @trunc_v8i132_cmp(<8 x i32> %a0) nounwind {
396; SSE2-SSSE3-LABEL: trunc_v8i132_cmp:
397; SSE2-SSSE3:       # %bb.0:
398; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
399; SSE2-SSSE3-NEXT:    pslld $31, %xmm0
400; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
401; SSE2-SSSE3-NEXT:    xorl $15, %eax
402; SSE2-SSSE3-NEXT:    setne %al
403; SSE2-SSSE3-NEXT:    retq
404;
405; SSE41-LABEL: trunc_v8i132_cmp:
406; SSE41:       # %bb.0:
407; SSE41-NEXT:    pand %xmm1, %xmm0
408; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
409; SSE41-NEXT:    setae %al
410; SSE41-NEXT:    retq
411;
412; AVX1-LABEL: trunc_v8i132_cmp:
413; AVX1:       # %bb.0:
414; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
415; AVX1-NEXT:    setae %al
416; AVX1-NEXT:    vzeroupper
417; AVX1-NEXT:    retq
418;
419; AVX2-LABEL: trunc_v8i132_cmp:
420; AVX2:       # %bb.0:
421; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
422; AVX2-NEXT:    vptest %ymm1, %ymm0
423; AVX2-NEXT:    setae %al
424; AVX2-NEXT:    vzeroupper
425; AVX2-NEXT:    retq
426;
427; AVX512-LABEL: trunc_v8i132_cmp:
428; AVX512:       # %bb.0:
429; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
430; AVX512-NEXT:    vptest %ymm1, %ymm0
431; AVX512-NEXT:    setae %al
432; AVX512-NEXT:    vzeroupper
433; AVX512-NEXT:    retq
434  %1 = trunc <8 x i32> %a0 to <8 x i1>
435  %2 = bitcast <8 x i1> %1 to i8
436  %3 = icmp ne i8 %2, -1
437  ret i1 %3
438}
439
440define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
441; SSE-LABEL: bitcast_v16i16_to_v2i8:
442; SSE:       # %bb.0:
443; SSE-NEXT:    packsswb %xmm1, %xmm0
444; SSE-NEXT:    pmovmskb %xmm0, %ecx
445; SSE-NEXT:    movl %ecx, %eax
446; SSE-NEXT:    shrl $8, %eax
447; SSE-NEXT:    addb %cl, %al
448; SSE-NEXT:    # kill: def $al killed $al killed $eax
449; SSE-NEXT:    retq
450;
451; AVX1-LABEL: bitcast_v16i16_to_v2i8:
452; AVX1:       # %bb.0:
453; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
454; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
455; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
456; AVX1-NEXT:    movl %ecx, %eax
457; AVX1-NEXT:    shrl $8, %eax
458; AVX1-NEXT:    addb %cl, %al
459; AVX1-NEXT:    # kill: def $al killed $al killed $eax
460; AVX1-NEXT:    vzeroupper
461; AVX1-NEXT:    retq
462;
463; AVX2-LABEL: bitcast_v16i16_to_v2i8:
464; AVX2:       # %bb.0:
465; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
466; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
467; AVX2-NEXT:    vpmovmskb %xmm0, %ecx
468; AVX2-NEXT:    movl %ecx, %eax
469; AVX2-NEXT:    shrl $8, %eax
470; AVX2-NEXT:    addb %cl, %al
471; AVX2-NEXT:    # kill: def $al killed $al killed $eax
472; AVX2-NEXT:    vzeroupper
473; AVX2-NEXT:    retq
474;
475; AVX512-LABEL: bitcast_v16i16_to_v2i8:
476; AVX512:       # %bb.0:
477; AVX512-NEXT:    vpmovw2m %ymm0, %k0
478; AVX512-NEXT:    kshiftrw $8, %k0, %k1
479; AVX512-NEXT:    kmovd %k0, %ecx
480; AVX512-NEXT:    kmovd %k1, %eax
481; AVX512-NEXT:    addb %cl, %al
482; AVX512-NEXT:    # kill: def $al killed $al killed $eax
483; AVX512-NEXT:    vzeroupper
484; AVX512-NEXT:    retq
485  %1 = icmp slt <16 x i16> %a0, zeroinitializer
486  %2 = bitcast <16 x i1> %1 to <2 x i8>
487  %3 = extractelement <2 x i8> %2, i32 0
488  %4 = extractelement <2 x i8> %2, i32 1
489  %5 = add i8 %3, %4
490  ret i8 %5
491}
492
493define i1 @trunc_v16i16_cmp(<16 x i16> %a0) nounwind {
494; SSE2-SSSE3-LABEL: trunc_v16i16_cmp:
495; SSE2-SSSE3:       # %bb.0:
496; SSE2-SSSE3-NEXT:    por %xmm1, %xmm0
497; SSE2-SSSE3-NEXT:    psllw $7, %xmm0
498; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
499; SSE2-SSSE3-NEXT:    testl $21845, %eax # imm = 0x5555
500; SSE2-SSSE3-NEXT:    sete %al
501; SSE2-SSSE3-NEXT:    retq
502;
503; SSE41-LABEL: trunc_v16i16_cmp:
504; SSE41:       # %bb.0:
505; SSE41-NEXT:    por %xmm1, %xmm0
506; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
507; SSE41-NEXT:    sete %al
508; SSE41-NEXT:    retq
509;
510; AVX1-LABEL: trunc_v16i16_cmp:
511; AVX1:       # %bb.0:
512; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
513; AVX1-NEXT:    sete %al
514; AVX1-NEXT:    vzeroupper
515; AVX1-NEXT:    retq
516;
517; AVX2-LABEL: trunc_v16i16_cmp:
518; AVX2:       # %bb.0:
519; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
520; AVX2-NEXT:    vptest %ymm1, %ymm0
521; AVX2-NEXT:    sete %al
522; AVX2-NEXT:    vzeroupper
523; AVX2-NEXT:    retq
524;
525; AVX512-LABEL: trunc_v16i16_cmp:
526; AVX512:       # %bb.0:
527; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
528; AVX512-NEXT:    vptest %ymm1, %ymm0
529; AVX512-NEXT:    sete %al
530; AVX512-NEXT:    vzeroupper
531; AVX512-NEXT:    retq
532  %1 = trunc <16 x i16> %a0 to <16 x i1>
533  %2 = bitcast <16 x i1> %1 to i16
534  %3 = icmp eq i16 %2, 0
535  ret i1 %3
536}
537
538define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
539; SSE-LABEL: bitcast_v32i8_to_v2i16:
540; SSE:       # %bb.0:
541; SSE-NEXT:    pmovmskb %xmm1, %ecx
542; SSE-NEXT:    pmovmskb %xmm0, %eax
543; SSE-NEXT:    addl %ecx, %eax
544; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
545; SSE-NEXT:    retq
546;
547; AVX1-LABEL: bitcast_v32i8_to_v2i16:
548; AVX1:       # %bb.0:
549; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
550; AVX1-NEXT:    vpmovmskb %xmm1, %ecx
551; AVX1-NEXT:    vpmovmskb %xmm0, %eax
552; AVX1-NEXT:    addl %ecx, %eax
553; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
554; AVX1-NEXT:    vzeroupper
555; AVX1-NEXT:    retq
556;
557; AVX2-LABEL: bitcast_v32i8_to_v2i16:
558; AVX2:       # %bb.0:
559; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
560; AVX2-NEXT:    movl %ecx, %eax
561; AVX2-NEXT:    shrl $16, %eax
562; AVX2-NEXT:    addl %ecx, %eax
563; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
564; AVX2-NEXT:    vzeroupper
565; AVX2-NEXT:    retq
566;
567; AVX512-LABEL: bitcast_v32i8_to_v2i16:
568; AVX512:       # %bb.0:
569; AVX512-NEXT:    vpmovb2m %ymm0, %k0
570; AVX512-NEXT:    kshiftrd $16, %k0, %k1
571; AVX512-NEXT:    kmovd %k0, %ecx
572; AVX512-NEXT:    kmovd %k1, %eax
573; AVX512-NEXT:    addl %ecx, %eax
574; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
575; AVX512-NEXT:    vzeroupper
576; AVX512-NEXT:    retq
577  %1 = icmp slt <32 x i8> %a0, zeroinitializer
578  %2 = bitcast <32 x i1> %1 to <2 x i16>
579  %3 = extractelement <2 x i16> %2, i32 0
580  %4 = extractelement <2 x i16> %2, i32 1
581  %5 = add i16 %3, %4
582  ret i16 %5
583}
584
585define i1 @trunc_v32i8_cmp(<32 x i8> %a0) nounwind {
586; SSE2-SSSE3-LABEL: trunc_v32i8_cmp:
587; SSE2-SSSE3:       # %bb.0:
588; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
589; SSE2-SSSE3-NEXT:    psllw $7, %xmm0
590; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
591; SSE2-SSSE3-NEXT:    xorl $65535, %eax # imm = 0xFFFF
592; SSE2-SSSE3-NEXT:    sete %al
593; SSE2-SSSE3-NEXT:    retq
594;
595; SSE41-LABEL: trunc_v32i8_cmp:
596; SSE41:       # %bb.0:
597; SSE41-NEXT:    pand %xmm1, %xmm0
598; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
599; SSE41-NEXT:    setb %al
600; SSE41-NEXT:    retq
601;
602; AVX1-LABEL: trunc_v32i8_cmp:
603; AVX1:       # %bb.0:
604; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
605; AVX1-NEXT:    setb %al
606; AVX1-NEXT:    vzeroupper
607; AVX1-NEXT:    retq
608;
609; AVX2-LABEL: trunc_v32i8_cmp:
610; AVX2:       # %bb.0:
611; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
612; AVX2-NEXT:    vptest %ymm1, %ymm0
613; AVX2-NEXT:    setb %al
614; AVX2-NEXT:    vzeroupper
615; AVX2-NEXT:    retq
616;
617; AVX512-LABEL: trunc_v32i8_cmp:
618; AVX512:       # %bb.0:
619; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
620; AVX512-NEXT:    vptest %ymm1, %ymm0
621; AVX512-NEXT:    setb %al
622; AVX512-NEXT:    vzeroupper
623; AVX512-NEXT:    retq
624  %1 = trunc <32 x i8> %a0 to <32 x i1>
625  %2 = bitcast <32 x i1> %1 to i32
626  %3 = icmp eq i32 %2, -1
627  ret i1 %3
628}
629
630;
631; 512-bit vectors
632;
633
634define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
635; SSE-LABEL: bitcast_v8i64_to_v2i4:
636; SSE:       # %bb.0:
637; SSE-NEXT:    packssdw %xmm3, %xmm2
638; SSE-NEXT:    packssdw %xmm1, %xmm0
639; SSE-NEXT:    packssdw %xmm2, %xmm0
640; SSE-NEXT:    packsswb %xmm0, %xmm0
641; SSE-NEXT:    pmovmskb %xmm0, %eax
642; SSE-NEXT:    movl %eax, %ecx
643; SSE-NEXT:    shrb $4, %cl
644; SSE-NEXT:    andb $15, %al
645; SSE-NEXT:    addb %cl, %al
646; SSE-NEXT:    # kill: def $al killed $al killed $eax
647; SSE-NEXT:    retq
648;
649; AVX1-LABEL: bitcast_v8i64_to_v2i4:
650; AVX1:       # %bb.0:
651; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
652; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
653; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
654; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
655; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
656; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
657; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
658; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
659; AVX1-NEXT:    vmovmskps %ymm0, %eax
660; AVX1-NEXT:    movl %eax, %ecx
661; AVX1-NEXT:    shrb $4, %cl
662; AVX1-NEXT:    andb $15, %al
663; AVX1-NEXT:    addb %cl, %al
664; AVX1-NEXT:    # kill: def $al killed $al killed $eax
665; AVX1-NEXT:    vzeroupper
666; AVX1-NEXT:    retq
667;
668; AVX2-LABEL: bitcast_v8i64_to_v2i4:
669; AVX2:       # %bb.0:
670; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
671; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
672; AVX2-NEXT:    vmovmskps %ymm0, %eax
673; AVX2-NEXT:    movl %eax, %ecx
674; AVX2-NEXT:    shrb $4, %cl
675; AVX2-NEXT:    andb $15, %al
676; AVX2-NEXT:    addb %cl, %al
677; AVX2-NEXT:    # kill: def $al killed $al killed $eax
678; AVX2-NEXT:    vzeroupper
679; AVX2-NEXT:    retq
680;
681; AVX512-LABEL: bitcast_v8i64_to_v2i4:
682; AVX512:       # %bb.0:
683; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
684; AVX512-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
685; AVX512-NEXT:    kmovd %k0, %eax
686; AVX512-NEXT:    movl %eax, %ecx
687; AVX512-NEXT:    shrb $4, %cl
688; AVX512-NEXT:    andb $15, %al
689; AVX512-NEXT:    addb %cl, %al
690; AVX512-NEXT:    # kill: def $al killed $al killed $eax
691; AVX512-NEXT:    vzeroupper
692; AVX512-NEXT:    retq
693  %1 = icmp slt <8 x i64> %a0, zeroinitializer
694  %2 = bitcast <8 x i1> %1 to <2 x i4>
695  %3 = extractelement <2 x i4> %2, i32 0
696  %4 = extractelement <2 x i4> %2, i32 1
697  %5 = add i4 %3, %4
698  ret i4 %5
699}
700
701define i1 @trunc_v8i64_cmp(<8 x i64> %a0) nounwind {
702; SSE2-SSSE3-LABEL: trunc_v8i64_cmp:
703; SSE2-SSSE3:       # %bb.0:
704; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
705; SSE2-SSSE3-NEXT:    pslld $16, %xmm2
706; SSE2-SSSE3-NEXT:    psrad $16, %xmm2
707; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
708; SSE2-SSSE3-NEXT:    pslld $16, %xmm0
709; SSE2-SSSE3-NEXT:    psrad $16, %xmm0
710; SSE2-SSSE3-NEXT:    packssdw %xmm2, %xmm0
711; SSE2-SSSE3-NEXT:    psllw $15, %xmm0
712; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
713; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
714; SSE2-SSSE3-NEXT:    cmpb $-1, %al
715; SSE2-SSSE3-NEXT:    sete %al
716; SSE2-SSSE3-NEXT:    retq
717;
718; SSE41-LABEL: trunc_v8i64_cmp:
719; SSE41:       # %bb.0:
720; SSE41-NEXT:    pand %xmm3, %xmm1
721; SSE41-NEXT:    pand %xmm2, %xmm0
722; SSE41-NEXT:    pand %xmm1, %xmm0
723; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
724; SSE41-NEXT:    setb %al
725; SSE41-NEXT:    retq
726;
727; AVX1-LABEL: trunc_v8i64_cmp:
728; AVX1:       # %bb.0:
729; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
730; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
731; AVX1-NEXT:    setb %al
732; AVX1-NEXT:    vzeroupper
733; AVX1-NEXT:    retq
734;
735; AVX2-LABEL: trunc_v8i64_cmp:
736; AVX2:       # %bb.0:
737; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
738; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
739; AVX2-NEXT:    vptest %ymm1, %ymm0
740; AVX2-NEXT:    setb %al
741; AVX2-NEXT:    vzeroupper
742; AVX2-NEXT:    retq
743;
744; AVX512-LABEL: trunc_v8i64_cmp:
745; AVX512:       # %bb.0:
746; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
747; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
748; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
749; AVX512-NEXT:    kortestw %k0, %k0
750; AVX512-NEXT:    sete %al
751; AVX512-NEXT:    vzeroupper
752; AVX512-NEXT:    retq
753  %1 = trunc <8 x i64> %a0 to <8 x i1>
754  %2 = bitcast <8 x i1> %1 to i8
755  %3 = icmp eq i8 %2, -1
756  ret i1 %3
757}
758
759define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
760; SSE-LABEL: bitcast_v16i32_to_v2i8:
761; SSE:       # %bb.0:
762; SSE-NEXT:    packssdw %xmm3, %xmm2
763; SSE-NEXT:    packssdw %xmm1, %xmm0
764; SSE-NEXT:    packsswb %xmm2, %xmm0
765; SSE-NEXT:    pmovmskb %xmm0, %ecx
766; SSE-NEXT:    movl %ecx, %eax
767; SSE-NEXT:    shrl $8, %eax
768; SSE-NEXT:    addb %cl, %al
769; SSE-NEXT:    # kill: def $al killed $al killed $eax
770; SSE-NEXT:    retq
771;
772; AVX1-LABEL: bitcast_v16i32_to_v2i8:
773; AVX1:       # %bb.0:
774; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
775; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
776; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
777; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
778; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
779; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
780; AVX1-NEXT:    movl %ecx, %eax
781; AVX1-NEXT:    shrl $8, %eax
782; AVX1-NEXT:    addb %cl, %al
783; AVX1-NEXT:    # kill: def $al killed $al killed $eax
784; AVX1-NEXT:    vzeroupper
785; AVX1-NEXT:    retq
786;
787; AVX2-LABEL: bitcast_v16i32_to_v2i8:
788; AVX2:       # %bb.0:
789; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
790; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm2, %ymm1
791; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm2, %ymm0
792; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
793; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
794; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
795; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
796; AVX2-NEXT:    vpmovmskb %xmm0, %ecx
797; AVX2-NEXT:    movl %ecx, %eax
798; AVX2-NEXT:    shrl $8, %eax
799; AVX2-NEXT:    addb %cl, %al
800; AVX2-NEXT:    # kill: def $al killed $al killed $eax
801; AVX2-NEXT:    vzeroupper
802; AVX2-NEXT:    retq
803;
804; AVX512-LABEL: bitcast_v16i32_to_v2i8:
805; AVX512:       # %bb.0:
806; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
807; AVX512-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
808; AVX512-NEXT:    kshiftrw $8, %k0, %k1
809; AVX512-NEXT:    kmovd %k0, %ecx
810; AVX512-NEXT:    kmovd %k1, %eax
811; AVX512-NEXT:    addb %cl, %al
812; AVX512-NEXT:    # kill: def $al killed $al killed $eax
813; AVX512-NEXT:    vzeroupper
814; AVX512-NEXT:    retq
815  %1 = icmp slt <16 x i32> %a0, zeroinitializer
816  %2 = bitcast <16 x i1> %1 to <2 x i8>
817  %3 = extractelement <2 x i8> %2, i32 0
818  %4 = extractelement <2 x i8> %2, i32 1
819  %5 = add i8 %3, %4
820  ret i8 %5
821}
822
823define i1 @trunc_v16i32_cmp(<16 x i32> %a0) nounwind {
824; SSE2-SSSE3-LABEL: trunc_v16i32_cmp:
825; SSE2-SSSE3:       # %bb.0:
826; SSE2-SSSE3-NEXT:    por %xmm3, %xmm1
827; SSE2-SSSE3-NEXT:    por %xmm2, %xmm0
828; SSE2-SSSE3-NEXT:    por %xmm1, %xmm0
829; SSE2-SSSE3-NEXT:    pslld $31, %xmm0
830; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
831; SSE2-SSSE3-NEXT:    testl %eax, %eax
832; SSE2-SSSE3-NEXT:    sete %al
833; SSE2-SSSE3-NEXT:    retq
834;
835; SSE41-LABEL: trunc_v16i32_cmp:
836; SSE41:       # %bb.0:
837; SSE41-NEXT:    por %xmm3, %xmm1
838; SSE41-NEXT:    por %xmm2, %xmm0
839; SSE41-NEXT:    por %xmm1, %xmm0
840; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
841; SSE41-NEXT:    sete %al
842; SSE41-NEXT:    retq
843;
844; AVX1-LABEL: trunc_v16i32_cmp:
845; AVX1:       # %bb.0:
846; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
847; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
848; AVX1-NEXT:    sete %al
849; AVX1-NEXT:    vzeroupper
850; AVX1-NEXT:    retq
851;
852; AVX2-LABEL: trunc_v16i32_cmp:
853; AVX2:       # %bb.0:
854; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
855; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
856; AVX2-NEXT:    vptest %ymm1, %ymm0
857; AVX2-NEXT:    sete %al
858; AVX2-NEXT:    vzeroupper
859; AVX2-NEXT:    retq
860;
861; AVX512-LABEL: trunc_v16i32_cmp:
862; AVX512:       # %bb.0:
863; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
864; AVX512-NEXT:    kortestw %k0, %k0
865; AVX512-NEXT:    sete %al
866; AVX512-NEXT:    vzeroupper
867; AVX512-NEXT:    retq
868  %1 = trunc <16 x i32> %a0 to <16 x i1>
869  %2 = bitcast <16 x i1> %1 to i16
870  %3 = icmp eq i16 %2, 0
871  ret i1 %3
872}
873
874define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
875; SSE-LABEL: bitcast_v32i16_to_v2i16:
876; SSE:       # %bb.0:
877; SSE-NEXT:    packsswb %xmm3, %xmm2
878; SSE-NEXT:    pmovmskb %xmm2, %ecx
879; SSE-NEXT:    packsswb %xmm1, %xmm0
880; SSE-NEXT:    pmovmskb %xmm0, %eax
881; SSE-NEXT:    addl %ecx, %eax
882; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
883; SSE-NEXT:    retq
884;
885; AVX1-LABEL: bitcast_v32i16_to_v2i16:
886; AVX1:       # %bb.0:
887; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
888; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
889; AVX1-NEXT:    vpmovmskb %xmm1, %ecx
890; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
891; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
892; AVX1-NEXT:    vpmovmskb %xmm0, %eax
893; AVX1-NEXT:    addl %ecx, %eax
894; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
895; AVX1-NEXT:    vzeroupper
896; AVX1-NEXT:    retq
897;
898; AVX2-LABEL: bitcast_v32i16_to_v2i16:
899; AVX2:       # %bb.0:
900; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
901; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
902; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
903; AVX2-NEXT:    movl %ecx, %eax
904; AVX2-NEXT:    shrl $16, %eax
905; AVX2-NEXT:    addl %ecx, %eax
906; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
907; AVX2-NEXT:    vzeroupper
908; AVX2-NEXT:    retq
909;
910; AVX512-LABEL: bitcast_v32i16_to_v2i16:
911; AVX512:       # %bb.0:
912; AVX512-NEXT:    vpmovw2m %zmm0, %k0
913; AVX512-NEXT:    kshiftrd $16, %k0, %k1
914; AVX512-NEXT:    kmovd %k0, %ecx
915; AVX512-NEXT:    kmovd %k1, %eax
916; AVX512-NEXT:    addl %ecx, %eax
917; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
918; AVX512-NEXT:    vzeroupper
919; AVX512-NEXT:    retq
920  %1 = icmp slt <32 x i16> %a0, zeroinitializer
921  %2 = bitcast <32 x i1> %1 to <2 x i16>
922  %3 = extractelement <2 x i16> %2, i32 0
923  %4 = extractelement <2 x i16> %2, i32 1
924  %5 = add i16 %3, %4
925  ret i16 %5
926}
927
928define i1 @trunc_v32i16_cmp(<32 x i16> %a0) nounwind {
929; SSE2-SSSE3-LABEL: trunc_v32i16_cmp:
930; SSE2-SSSE3:       # %bb.0:
931; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm1
932; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
933; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
934; SSE2-SSSE3-NEXT:    psllw $7, %xmm0
935; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
936; SSE2-SSSE3-NEXT:    notl %eax
937; SSE2-SSSE3-NEXT:    testl $21845, %eax # imm = 0x5555
938; SSE2-SSSE3-NEXT:    setne %al
939; SSE2-SSSE3-NEXT:    retq
940;
941; SSE41-LABEL: trunc_v32i16_cmp:
942; SSE41:       # %bb.0:
943; SSE41-NEXT:    pand %xmm3, %xmm1
944; SSE41-NEXT:    pand %xmm2, %xmm0
945; SSE41-NEXT:    pand %xmm1, %xmm0
946; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
947; SSE41-NEXT:    setae %al
948; SSE41-NEXT:    retq
949;
950; AVX1-LABEL: trunc_v32i16_cmp:
951; AVX1:       # %bb.0:
952; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
953; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
954; AVX1-NEXT:    setae %al
955; AVX1-NEXT:    vzeroupper
956; AVX1-NEXT:    retq
957;
958; AVX2-LABEL: trunc_v32i16_cmp:
959; AVX2:       # %bb.0:
960; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
961; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
962; AVX2-NEXT:    vptest %ymm1, %ymm0
963; AVX2-NEXT:    setae %al
964; AVX2-NEXT:    vzeroupper
965; AVX2-NEXT:    retq
966;
967; AVX512-LABEL: trunc_v32i16_cmp:
968; AVX512:       # %bb.0:
969; AVX512-NEXT:    vpbroadcastw {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
970; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
971; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
972; AVX512-NEXT:    kortestw %k0, %k0
973; AVX512-NEXT:    setne %al
974; AVX512-NEXT:    vzeroupper
975; AVX512-NEXT:    retq
976  %1 = trunc <32 x i16> %a0 to <32 x i1>
977  %2 = bitcast <32 x i1> %1 to i32
978  %3 = icmp ne i32 %2, -1
979  ret i1 %3
980}
981
982define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
983; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32:
984; SSE2-SSSE3:       # %bb.0:
985; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
986; SSE2-SSSE3-NEXT:    pmovmskb %xmm1, %ecx
987; SSE2-SSSE3-NEXT:    shll $16, %ecx
988; SSE2-SSSE3-NEXT:    orl %eax, %ecx
989; SSE2-SSSE3-NEXT:    pmovmskb %xmm2, %eax
990; SSE2-SSSE3-NEXT:    pmovmskb %xmm3, %edx
991; SSE2-SSSE3-NEXT:    shll $16, %edx
992; SSE2-SSSE3-NEXT:    orl %eax, %edx
993; SSE2-SSSE3-NEXT:    shlq $32, %rdx
994; SSE2-SSSE3-NEXT:    orq %rcx, %rdx
995; SSE2-SSSE3-NEXT:    movq %rdx, %xmm0
996; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
997; SSE2-SSSE3-NEXT:    movd %xmm0, %eax
998; SSE2-SSSE3-NEXT:    addl %ecx, %eax
999; SSE2-SSSE3-NEXT:    retq
1000;
1001; SSE41-LABEL: bitcast_v64i8_to_v2i32:
1002; SSE41:       # %bb.0:
1003; SSE41-NEXT:    pmovmskb %xmm2, %eax
1004; SSE41-NEXT:    pmovmskb %xmm3, %ecx
1005; SSE41-NEXT:    shll $16, %ecx
1006; SSE41-NEXT:    orl %eax, %ecx
1007; SSE41-NEXT:    pmovmskb %xmm0, %edx
1008; SSE41-NEXT:    pmovmskb %xmm1, %eax
1009; SSE41-NEXT:    shll $16, %eax
1010; SSE41-NEXT:    orl %edx, %eax
1011; SSE41-NEXT:    addl %ecx, %eax
1012; SSE41-NEXT:    retq
1013;
1014; AVX1-LABEL: bitcast_v64i8_to_v2i32:
1015; AVX1:       # %bb.0:
1016; AVX1-NEXT:    vpmovmskb %xmm1, %eax
1017; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
1018; AVX1-NEXT:    vpmovmskb %xmm1, %ecx
1019; AVX1-NEXT:    shll $16, %ecx
1020; AVX1-NEXT:    orl %eax, %ecx
1021; AVX1-NEXT:    vpmovmskb %xmm0, %edx
1022; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1023; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1024; AVX1-NEXT:    shll $16, %eax
1025; AVX1-NEXT:    orl %edx, %eax
1026; AVX1-NEXT:    addl %ecx, %eax
1027; AVX1-NEXT:    vzeroupper
1028; AVX1-NEXT:    retq
1029;
1030; AVX2-LABEL: bitcast_v64i8_to_v2i32:
1031; AVX2:       # %bb.0:
1032; AVX2-NEXT:    vpmovmskb %ymm1, %ecx
1033; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1034; AVX2-NEXT:    addl %ecx, %eax
1035; AVX2-NEXT:    vzeroupper
1036; AVX2-NEXT:    retq
1037;
1038; AVX512-LABEL: bitcast_v64i8_to_v2i32:
1039; AVX512:       # %bb.0:
1040; AVX512-NEXT:    vpmovb2m %zmm0, %k0
1041; AVX512-NEXT:    kshiftrq $32, %k0, %k1
1042; AVX512-NEXT:    kmovd %k0, %ecx
1043; AVX512-NEXT:    kmovd %k1, %eax
1044; AVX512-NEXT:    addl %ecx, %eax
1045; AVX512-NEXT:    vzeroupper
1046; AVX512-NEXT:    retq
1047  %1 = icmp slt <64 x i8> %a0, zeroinitializer
1048  %2 = bitcast <64 x i1> %1 to <2 x i32>
1049  %3 = extractelement <2 x i32> %2, i32 0
1050  %4 = extractelement <2 x i32> %2, i32 1
1051  %5 = add i32 %3, %4
1052  ret i32 %5
1053}
1054
1055define i1 @trunc_v64i8_cmp(<64 x i8> %a0) nounwind {
1056; SSE2-SSSE3-LABEL: trunc_v64i8_cmp:
1057; SSE2-SSSE3:       # %bb.0:
1058; SSE2-SSSE3-NEXT:    por %xmm3, %xmm1
1059; SSE2-SSSE3-NEXT:    por %xmm2, %xmm0
1060; SSE2-SSSE3-NEXT:    por %xmm1, %xmm0
1061; SSE2-SSSE3-NEXT:    psllw $7, %xmm0
1062; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
1063; SSE2-SSSE3-NEXT:    testl %eax, %eax
1064; SSE2-SSSE3-NEXT:    setne %al
1065; SSE2-SSSE3-NEXT:    retq
1066;
1067; SSE41-LABEL: trunc_v64i8_cmp:
1068; SSE41:       # %bb.0:
1069; SSE41-NEXT:    por %xmm3, %xmm1
1070; SSE41-NEXT:    por %xmm2, %xmm0
1071; SSE41-NEXT:    por %xmm1, %xmm0
1072; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1073; SSE41-NEXT:    setne %al
1074; SSE41-NEXT:    retq
1075;
1076; AVX1-LABEL: trunc_v64i8_cmp:
1077; AVX1:       # %bb.0:
1078; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
1079; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1080; AVX1-NEXT:    setne %al
1081; AVX1-NEXT:    vzeroupper
1082; AVX1-NEXT:    retq
1083;
1084; AVX2-LABEL: trunc_v64i8_cmp:
1085; AVX2:       # %bb.0:
1086; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1087; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
1088; AVX2-NEXT:    vptest %ymm1, %ymm0
1089; AVX2-NEXT:    setne %al
1090; AVX2-NEXT:    vzeroupper
1091; AVX2-NEXT:    retq
1092;
1093; AVX512-LABEL: trunc_v64i8_cmp:
1094; AVX512:       # %bb.0:
1095; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1096; AVX512-NEXT:    kortestw %k0, %k0
1097; AVX512-NEXT:    setne %al
1098; AVX512-NEXT:    vzeroupper
1099; AVX512-NEXT:    retq
1100  %1 = trunc <64 x i8> %a0 to <64 x i1>
1101  %2 = bitcast <64 x i1> %1 to i64
1102  %3 = icmp ne i64 %2, 0
1103  ret i1 %3
1104}
1105
1106define i64 @bitcast_v128i8_to_v2i64(<128 x i8> %a0) nounwind {
1107; SSE-LABEL: bitcast_v128i8_to_v2i64:
1108; SSE:       # %bb.0:
1109; SSE-NEXT:    pmovmskb %xmm4, %eax
1110; SSE-NEXT:    pmovmskb %xmm5, %ecx
1111; SSE-NEXT:    shll $16, %ecx
1112; SSE-NEXT:    orl %eax, %ecx
1113; SSE-NEXT:    pmovmskb %xmm6, %eax
1114; SSE-NEXT:    pmovmskb %xmm7, %edx
1115; SSE-NEXT:    shll $16, %edx
1116; SSE-NEXT:    orl %eax, %edx
1117; SSE-NEXT:    shlq $32, %rdx
1118; SSE-NEXT:    orq %rcx, %rdx
1119; SSE-NEXT:    pmovmskb %xmm0, %eax
1120; SSE-NEXT:    pmovmskb %xmm1, %ecx
1121; SSE-NEXT:    shll $16, %ecx
1122; SSE-NEXT:    orl %eax, %ecx
1123; SSE-NEXT:    pmovmskb %xmm2, %esi
1124; SSE-NEXT:    pmovmskb %xmm3, %eax
1125; SSE-NEXT:    shll $16, %eax
1126; SSE-NEXT:    orl %esi, %eax
1127; SSE-NEXT:    shlq $32, %rax
1128; SSE-NEXT:    orq %rcx, %rax
1129; SSE-NEXT:    addq %rdx, %rax
1130; SSE-NEXT:    retq
1131;
1132; AVX1-LABEL: bitcast_v128i8_to_v2i64:
1133; AVX1:       # %bb.0:
1134; AVX1-NEXT:    vpmovmskb %xmm2, %eax
1135; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
1136; AVX1-NEXT:    vpmovmskb %xmm2, %edx
1137; AVX1-NEXT:    shll $16, %edx
1138; AVX1-NEXT:    orl %eax, %edx
1139; AVX1-NEXT:    vpmovmskb %xmm3, %eax
1140; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
1141; AVX1-NEXT:    vpmovmskb %xmm2, %ecx
1142; AVX1-NEXT:    shll $16, %ecx
1143; AVX1-NEXT:    orl %eax, %ecx
1144; AVX1-NEXT:    shlq $32, %rcx
1145; AVX1-NEXT:    orq %rdx, %rcx
1146; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1147; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1148; AVX1-NEXT:    vpmovmskb %xmm0, %edx
1149; AVX1-NEXT:    shll $16, %edx
1150; AVX1-NEXT:    orl %eax, %edx
1151; AVX1-NEXT:    vpmovmskb %xmm1, %esi
1152; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
1153; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1154; AVX1-NEXT:    shll $16, %eax
1155; AVX1-NEXT:    orl %esi, %eax
1156; AVX1-NEXT:    shlq $32, %rax
1157; AVX1-NEXT:    orq %rdx, %rax
1158; AVX1-NEXT:    addq %rcx, %rax
1159; AVX1-NEXT:    vzeroupper
1160; AVX1-NEXT:    retq
1161;
1162; AVX2-LABEL: bitcast_v128i8_to_v2i64:
1163; AVX2:       # %bb.0:
1164; AVX2-NEXT:    vpmovmskb %ymm3, %eax
1165; AVX2-NEXT:    shlq $32, %rax
1166; AVX2-NEXT:    vpmovmskb %ymm2, %ecx
1167; AVX2-NEXT:    orq %rax, %rcx
1168; AVX2-NEXT:    vpmovmskb %ymm1, %edx
1169; AVX2-NEXT:    shlq $32, %rdx
1170; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1171; AVX2-NEXT:    orq %rdx, %rax
1172; AVX2-NEXT:    addq %rcx, %rax
1173; AVX2-NEXT:    vzeroupper
1174; AVX2-NEXT:    retq
1175;
1176; AVX512-LABEL: bitcast_v128i8_to_v2i64:
1177; AVX512:       # %bb.0:
1178; AVX512-NEXT:    vpmovb2m %zmm1, %k0
1179; AVX512-NEXT:    kmovq %k0, %rcx
1180; AVX512-NEXT:    vpmovb2m %zmm0, %k0
1181; AVX512-NEXT:    kmovq %k0, %rax
1182; AVX512-NEXT:    addq %rcx, %rax
1183; AVX512-NEXT:    vzeroupper
1184; AVX512-NEXT:    retq
1185  %1 = icmp slt <128 x i8> %a0, zeroinitializer
1186  %2 = bitcast <128 x i1> %1 to <2 x i64>
1187  %3 = extractelement <2 x i64> %2, i32 0
1188  %4 = extractelement <2 x i64> %2, i32 1
1189  %5 = add i64 %3, %4
1190  ret i64 %5
1191}
1192
1193define i1 @trunc_v128i8_cmp(<128 x i8> %a0) nounwind {
1194; SSE2-SSSE3-LABEL: trunc_v128i8_cmp:
1195; SSE2-SSSE3:       # %bb.0:
1196; SSE2-SSSE3-NEXT:    psllw $7, %xmm0
1197; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
1198; SSE2-SSSE3-NEXT:    psllw $7, %xmm1
1199; SSE2-SSSE3-NEXT:    pmovmskb %xmm1, %ecx
1200; SSE2-SSSE3-NEXT:    shll $16, %ecx
1201; SSE2-SSSE3-NEXT:    orl %eax, %ecx
1202; SSE2-SSSE3-NEXT:    psllw $7, %xmm2
1203; SSE2-SSSE3-NEXT:    pmovmskb %xmm2, %edx
1204; SSE2-SSSE3-NEXT:    psllw $7, %xmm3
1205; SSE2-SSSE3-NEXT:    pmovmskb %xmm3, %eax
1206; SSE2-SSSE3-NEXT:    shll $16, %eax
1207; SSE2-SSSE3-NEXT:    orl %edx, %eax
1208; SSE2-SSSE3-NEXT:    shlq $32, %rax
1209; SSE2-SSSE3-NEXT:    orq %rcx, %rax
1210; SSE2-SSSE3-NEXT:    psllw $7, %xmm4
1211; SSE2-SSSE3-NEXT:    pmovmskb %xmm4, %ecx
1212; SSE2-SSSE3-NEXT:    psllw $7, %xmm5
1213; SSE2-SSSE3-NEXT:    pmovmskb %xmm5, %edx
1214; SSE2-SSSE3-NEXT:    shll $16, %edx
1215; SSE2-SSSE3-NEXT:    orl %ecx, %edx
1216; SSE2-SSSE3-NEXT:    psllw $7, %xmm6
1217; SSE2-SSSE3-NEXT:    pmovmskb %xmm6, %ecx
1218; SSE2-SSSE3-NEXT:    psllw $7, %xmm7
1219; SSE2-SSSE3-NEXT:    pmovmskb %xmm7, %esi
1220; SSE2-SSSE3-NEXT:    shll $16, %esi
1221; SSE2-SSSE3-NEXT:    orl %ecx, %esi
1222; SSE2-SSSE3-NEXT:    shlq $32, %rsi
1223; SSE2-SSSE3-NEXT:    orq %rdx, %rsi
1224; SSE2-SSSE3-NEXT:    movq %rsi, %xmm0
1225; SSE2-SSSE3-NEXT:    movq %rax, %xmm1
1226; SSE2-SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1227; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm0
1228; SSE2-SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
1229; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
1230; SSE2-SSSE3-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
1231; SSE2-SSSE3-NEXT:    setne %al
1232; SSE2-SSSE3-NEXT:    retq
1233;
1234; SSE41-LABEL: trunc_v128i8_cmp:
1235; SSE41:       # %bb.0:
1236; SSE41-NEXT:    psllw $7, %xmm0
1237; SSE41-NEXT:    pmovmskb %xmm0, %eax
1238; SSE41-NEXT:    psllw $7, %xmm1
1239; SSE41-NEXT:    pmovmskb %xmm1, %ecx
1240; SSE41-NEXT:    shll $16, %ecx
1241; SSE41-NEXT:    orl %eax, %ecx
1242; SSE41-NEXT:    psllw $7, %xmm2
1243; SSE41-NEXT:    pmovmskb %xmm2, %edx
1244; SSE41-NEXT:    psllw $7, %xmm3
1245; SSE41-NEXT:    pmovmskb %xmm3, %eax
1246; SSE41-NEXT:    shll $16, %eax
1247; SSE41-NEXT:    orl %edx, %eax
1248; SSE41-NEXT:    shlq $32, %rax
1249; SSE41-NEXT:    orq %rcx, %rax
1250; SSE41-NEXT:    psllw $7, %xmm4
1251; SSE41-NEXT:    pmovmskb %xmm4, %ecx
1252; SSE41-NEXT:    psllw $7, %xmm5
1253; SSE41-NEXT:    pmovmskb %xmm5, %edx
1254; SSE41-NEXT:    shll $16, %edx
1255; SSE41-NEXT:    orl %ecx, %edx
1256; SSE41-NEXT:    psllw $7, %xmm6
1257; SSE41-NEXT:    pmovmskb %xmm6, %ecx
1258; SSE41-NEXT:    psllw $7, %xmm7
1259; SSE41-NEXT:    pmovmskb %xmm7, %esi
1260; SSE41-NEXT:    shll $16, %esi
1261; SSE41-NEXT:    orl %ecx, %esi
1262; SSE41-NEXT:    shlq $32, %rsi
1263; SSE41-NEXT:    orq %rdx, %rsi
1264; SSE41-NEXT:    movq %rsi, %xmm0
1265; SSE41-NEXT:    movq %rax, %xmm1
1266; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1267; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
1268; SSE41-NEXT:    ptest %xmm0, %xmm1
1269; SSE41-NEXT:    setae %al
1270; SSE41-NEXT:    retq
1271;
1272; AVX1-LABEL: trunc_v128i8_cmp:
1273; AVX1:       # %bb.0:
1274; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm4
1275; AVX1-NEXT:    vpmovmskb %xmm4, %eax
1276; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1277; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
1278; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
1279; AVX1-NEXT:    shll $16, %ecx
1280; AVX1-NEXT:    orl %eax, %ecx
1281; AVX1-NEXT:    vpsllw $7, %xmm1, %xmm0
1282; AVX1-NEXT:    vpmovmskb %xmm0, %edx
1283; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
1284; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
1285; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1286; AVX1-NEXT:    shll $16, %eax
1287; AVX1-NEXT:    orl %edx, %eax
1288; AVX1-NEXT:    shlq $32, %rax
1289; AVX1-NEXT:    orq %rcx, %rax
1290; AVX1-NEXT:    vpsllw $7, %xmm2, %xmm0
1291; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
1292; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm0
1293; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
1294; AVX1-NEXT:    vpmovmskb %xmm0, %edx
1295; AVX1-NEXT:    shll $16, %edx
1296; AVX1-NEXT:    orl %ecx, %edx
1297; AVX1-NEXT:    vpsllw $7, %xmm3, %xmm0
1298; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
1299; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm0
1300; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
1301; AVX1-NEXT:    vpmovmskb %xmm0, %esi
1302; AVX1-NEXT:    shll $16, %esi
1303; AVX1-NEXT:    orl %ecx, %esi
1304; AVX1-NEXT:    shlq $32, %rsi
1305; AVX1-NEXT:    orq %rdx, %rsi
1306; AVX1-NEXT:    vmovq %rsi, %xmm0
1307; AVX1-NEXT:    vmovq %rax, %xmm1
1308; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1309; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1310; AVX1-NEXT:    vptest %xmm1, %xmm0
1311; AVX1-NEXT:    setae %al
1312; AVX1-NEXT:    vzeroupper
1313; AVX1-NEXT:    retq
1314;
1315; AVX2-LABEL: trunc_v128i8_cmp:
1316; AVX2:       # %bb.0:
1317; AVX2-NEXT:    vpsllw $7, %ymm1, %ymm1
1318; AVX2-NEXT:    vpmovmskb %ymm1, %eax
1319; AVX2-NEXT:    shlq $32, %rax
1320; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
1321; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
1322; AVX2-NEXT:    orq %rax, %rcx
1323; AVX2-NEXT:    vpsllw $7, %ymm3, %ymm0
1324; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1325; AVX2-NEXT:    shlq $32, %rax
1326; AVX2-NEXT:    vpsllw $7, %ymm2, %ymm0
1327; AVX2-NEXT:    vpmovmskb %ymm0, %edx
1328; AVX2-NEXT:    orq %rax, %rdx
1329; AVX2-NEXT:    vmovq %rdx, %xmm0
1330; AVX2-NEXT:    vmovq %rcx, %xmm1
1331; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1332; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1333; AVX2-NEXT:    vptest %xmm1, %xmm0
1334; AVX2-NEXT:    setae %al
1335; AVX2-NEXT:    vzeroupper
1336; AVX2-NEXT:    retq
1337;
1338; AVX512-LABEL: trunc_v128i8_cmp:
1339; AVX512:       # %bb.0:
1340; AVX512-NEXT:    vpsllw $7, %zmm0, %zmm0
1341; AVX512-NEXT:    vpmovb2m %zmm0, %k0
1342; AVX512-NEXT:    kmovq %k0, %rax
1343; AVX512-NEXT:    vpsllw $7, %zmm1, %zmm0
1344; AVX512-NEXT:    vpmovb2m %zmm0, %k0
1345; AVX512-NEXT:    kmovq %k0, %rcx
1346; AVX512-NEXT:    vmovq %rcx, %xmm0
1347; AVX512-NEXT:    vmovq %rax, %xmm1
1348; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1349; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1350; AVX512-NEXT:    vptest %xmm1, %xmm0
1351; AVX512-NEXT:    setae %al
1352; AVX512-NEXT:    vzeroupper
1353; AVX512-NEXT:    retq
1354  %1 = trunc <128 x i8> %a0 to <128 x i1>
1355  %2 = bitcast <128 x i1> %1 to i128
1356  %3 = icmp ne i128 %2, -1
1357  ret i1 %3
1358}
1359
1360define [2 x i8] @PR58546(<16 x float> %a0) {
1361; SSE-LABEL: PR58546:
1362; SSE:       # %bb.0:
1363; SSE-NEXT:    xorps %xmm4, %xmm4
1364; SSE-NEXT:    cmpunordps %xmm4, %xmm3
1365; SSE-NEXT:    cmpunordps %xmm4, %xmm2
1366; SSE-NEXT:    packssdw %xmm3, %xmm2
1367; SSE-NEXT:    cmpunordps %xmm4, %xmm1
1368; SSE-NEXT:    cmpunordps %xmm4, %xmm0
1369; SSE-NEXT:    packssdw %xmm1, %xmm0
1370; SSE-NEXT:    packsswb %xmm2, %xmm0
1371; SSE-NEXT:    pmovmskb %xmm0, %eax
1372; SSE-NEXT:    movl %eax, %edx
1373; SSE-NEXT:    shrl $8, %edx
1374; SSE-NEXT:    # kill: def $al killed $al killed $eax
1375; SSE-NEXT:    # kill: def $dl killed $dl killed $edx
1376; SSE-NEXT:    retq
1377;
1378; AVX1-LABEL: PR58546:
1379; AVX1:       # %bb.0:
1380; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1381; AVX1-NEXT:    vcmpunordps %ymm2, %ymm1, %ymm1
1382; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1383; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
1384; AVX1-NEXT:    vcmpunordps %ymm2, %ymm0, %ymm0
1385; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1386; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1387; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1388; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1389; AVX1-NEXT:    movl %eax, %edx
1390; AVX1-NEXT:    shrl $8, %edx
1391; AVX1-NEXT:    # kill: def $al killed $al killed $eax
1392; AVX1-NEXT:    # kill: def $dl killed $dl killed $edx
1393; AVX1-NEXT:    vzeroupper
1394; AVX1-NEXT:    retq
1395;
1396; AVX2-LABEL: PR58546:
1397; AVX2:       # %bb.0:
1398; AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1399; AVX2-NEXT:    vcmpunordps %ymm2, %ymm1, %ymm1
1400; AVX2-NEXT:    vcmpunordps %ymm2, %ymm0, %ymm0
1401; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1402; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1403; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1404; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1405; AVX2-NEXT:    vpmovmskb %xmm0, %eax
1406; AVX2-NEXT:    movl %eax, %edx
1407; AVX2-NEXT:    shrl $8, %edx
1408; AVX2-NEXT:    # kill: def $al killed $al killed $eax
1409; AVX2-NEXT:    # kill: def $dl killed $dl killed $edx
1410; AVX2-NEXT:    vzeroupper
1411; AVX2-NEXT:    retq
1412;
1413; AVX512-LABEL: PR58546:
1414; AVX512:       # %bb.0:
1415; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1416; AVX512-NEXT:    vcmpunordps %zmm1, %zmm0, %k0
1417; AVX512-NEXT:    kshiftrw $8, %k0, %k1
1418; AVX512-NEXT:    kmovd %k0, %eax
1419; AVX512-NEXT:    kmovd %k1, %edx
1420; AVX512-NEXT:    # kill: def $al killed $al killed $eax
1421; AVX512-NEXT:    # kill: def $dl killed $dl killed $edx
1422; AVX512-NEXT:    vzeroupper
1423; AVX512-NEXT:    retq
1424  %1 = fcmp uno <16 x float> %a0, zeroinitializer
1425  %2 = bitcast <16 x i1> %1 to <2 x i8>
1426  %3 = extractelement <2 x i8> %2, i64 0
1427  %4 = extractelement <2 x i8> %2, i64 1
1428  %5 = insertvalue [2 x i8] poison, i8 %3, 0
1429  %6 = insertvalue [2 x i8] %5, i8 %4, 1
1430  ret [2 x i8] %6
1431}
1432
1433define i8 @PR59526(<8 x i32> %a, <8 x i32> %b, ptr %mask) {
1434; SSE-LABEL: PR59526:
1435; SSE:       # %bb.0:
1436; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
1437; SSE-NEXT:    pcmpeqd %xmm3, %xmm1
1438; SSE-NEXT:    movdqu (%rdi), %xmm2
1439; SSE-NEXT:    pand %xmm0, %xmm2
1440; SSE-NEXT:    movdqu 16(%rdi), %xmm0
1441; SSE-NEXT:    pand %xmm1, %xmm0
1442; SSE-NEXT:    packssdw %xmm0, %xmm2
1443; SSE-NEXT:    pmovmskb %xmm2, %eax
1444; SSE-NEXT:    testl $43690, %eax # imm = 0xAAAA
1445; SSE-NEXT:    setne %al
1446; SSE-NEXT:    retq
1447;
1448; AVX1-LABEL: PR59526:
1449; AVX1:       # %bb.0:
1450; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1451; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1452; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
1453; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1454; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1455; AVX1-NEXT:    vtestps (%rdi), %ymm0
1456; AVX1-NEXT:    setne %al
1457; AVX1-NEXT:    vzeroupper
1458; AVX1-NEXT:    retq
1459;
1460; AVX2-LABEL: PR59526:
1461; AVX2:       # %bb.0:
1462; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
1463; AVX2-NEXT:    vtestps (%rdi), %ymm0
1464; AVX2-NEXT:    setne %al
1465; AVX2-NEXT:    vzeroupper
1466; AVX2-NEXT:    retq
1467;
1468; AVX512-LABEL: PR59526:
1469; AVX512:       # %bb.0:
1470; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1
1471; AVX512-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1472; AVX512-NEXT:    vpcmpgtd (%rdi), %ymm0, %k0 {%k1}
1473; AVX512-NEXT:    kmovd %k0, %eax
1474; AVX512-NEXT:    testb %al, %al
1475; AVX512-NEXT:    setne %al
1476; AVX512-NEXT:    vzeroupper
1477; AVX512-NEXT:    retq
1478  %cmp.eq = icmp eq <8 x i32> %a, %b
1479  %load = load <8 x i32>, ptr %mask, align 1
1480  %cmp.slt = icmp slt <8 x i32> %load, zeroinitializer
1481  %sel = select <8 x i1> %cmp.eq, <8 x i1> %cmp.slt, <8 x i1> zeroinitializer
1482  %bc = bitcast <8 x i1> %sel to i8
1483  %cmp = icmp ne i8 %bc, 0
1484  %conv = zext i1 %cmp to i8
1485  ret i8 %conv
1486}
1487