xref: /llvm-project/llvm/test/CodeGen/X86/avx512cdvl-intrinsics.ll (revision 31c5afb3f2e7617586996a6cb9513eda353ce0d1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
4
5define <4 x i32> @test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
6; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
7; X86:       # %bb.0:
8; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
9; X86-NEXT:    kmovw %eax, %k1
10; X86-NEXT:    vplzcntd %xmm0, %xmm1 {%k1}
11; X86-NEXT:    vmovdqa %xmm1, %xmm0
12; X86-NEXT:    retl
13;
14; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
15; X64:       # %bb.0:
16; X64-NEXT:    kmovw %edi, %k1
17; X64-NEXT:    vplzcntd %xmm0, %xmm1 {%k1}
18; X64-NEXT:    vmovdqa %xmm1, %xmm0
19; X64-NEXT:    retq
20  %1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false)
21  %2 = bitcast i8 %x2 to <8 x i1>
22  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
23  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1
24  ret <4 x i32> %3
25}
26
27define <4 x i32> @test_int_x86_avx512_maskz_vplzcnt_d_128(<4 x i32> %x0, i8 %x2) {
28; X86-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128:
29; X86:       # %bb.0:
30; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
31; X86-NEXT:    kmovw %eax, %k1
32; X86-NEXT:    vplzcntd %xmm0, %xmm0 {%k1} {z}
33; X86-NEXT:    retl
34;
35; X64-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128:
36; X64:       # %bb.0:
37; X64-NEXT:    kmovw %edi, %k1
38; X64-NEXT:    vplzcntd %xmm0, %xmm0 {%k1} {z}
39; X64-NEXT:    retq
40  %1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false)
41  %2 = bitcast i8 %x2 to <8 x i1>
42  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
43  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer
44  ret <4 x i32> %3
45}
46declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #0
47
48define <8 x i32> @test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
49; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
50; X86:       # %bb.0:
51; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
52; X86-NEXT:    kmovw %eax, %k1
53; X86-NEXT:    vplzcntd %ymm0, %ymm1 {%k1}
54; X86-NEXT:    vmovdqa %ymm1, %ymm0
55; X86-NEXT:    retl
56;
57; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
58; X64:       # %bb.0:
59; X64-NEXT:    kmovw %edi, %k1
60; X64-NEXT:    vplzcntd %ymm0, %ymm1 {%k1}
61; X64-NEXT:    vmovdqa %ymm1, %ymm0
62; X64-NEXT:    retq
63  %1 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %x0, i1 false)
64  %2 = bitcast i8 %x2 to <8 x i1>
65  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
66  ret <8 x i32> %3
67}
68declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) #0
69
70define <2 x i64> @test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
71; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
72; X86:       # %bb.0:
73; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
74; X86-NEXT:    kmovw %eax, %k1
75; X86-NEXT:    vplzcntq %xmm0, %xmm1 {%k1}
76; X86-NEXT:    vmovdqa %xmm1, %xmm0
77; X86-NEXT:    retl
78;
79; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
80; X64:       # %bb.0:
81; X64-NEXT:    kmovw %edi, %k1
82; X64-NEXT:    vplzcntq %xmm0, %xmm1 {%k1}
83; X64-NEXT:    vmovdqa %xmm1, %xmm0
84; X64-NEXT:    retq
85  %1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x0, i1 false)
86  %2 = bitcast i8 %x2 to <8 x i1>
87  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
88  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
89  ret <2 x i64> %3
90}
91declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
92
93define <4 x i64> @test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
94; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
95; X86:       # %bb.0:
96; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
97; X86-NEXT:    kmovw %eax, %k1
98; X86-NEXT:    vplzcntq %ymm0, %ymm1 {%k1}
99; X86-NEXT:    vmovdqa %ymm1, %ymm0
100; X86-NEXT:    retl
101;
102; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
103; X64:       # %bb.0:
104; X64-NEXT:    kmovw %edi, %k1
105; X64-NEXT:    vplzcntq %ymm0, %ymm1 {%k1}
106; X64-NEXT:    vmovdqa %ymm1, %ymm0
107; X64-NEXT:    retq
108  %1 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %x0, i1 false)
109  %2 = bitcast i8 %x2 to <8 x i1>
110  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
112  ret <4 x i64> %3
113}
114declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) #0
115
116define <4 x i32> @test_int_x86_avx512_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1) {
117; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_128:
118; CHECK:       # %bb.0:
119; CHECK-NEXT:    vpconflictd %xmm0, %xmm0
120; CHECK-NEXT:    ret{{[l|q]}}
121  %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0)
122  ret <4 x i32> %1
123}
124
125define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
126; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
127; X86:       # %bb.0:
128; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
129; X86-NEXT:    kmovw %eax, %k1
130; X86-NEXT:    vpconflictd %xmm0, %xmm1 {%k1}
131; X86-NEXT:    vmovdqa %xmm1, %xmm0
132; X86-NEXT:    retl
133;
134; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
135; X64:       # %bb.0:
136; X64-NEXT:    kmovw %edi, %k1
137; X64-NEXT:    vpconflictd %xmm0, %xmm1 {%k1}
138; X64-NEXT:    vmovdqa %xmm1, %xmm0
139; X64-NEXT:    retq
140  %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0)
141  %2 = bitcast i8 %x2 to <8 x i1>
142  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
143  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1
144  ret <4 x i32> %3
145}
146
147define <4 x i32> @test_int_x86_avx512_maskz_vpconflict_d_128(<4 x i32> %x0, i8 %x2) {
148; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128:
149; X86:       # %bb.0:
150; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
151; X86-NEXT:    kmovw %eax, %k1
152; X86-NEXT:    vpconflictd %xmm0, %xmm0 {%k1} {z}
153; X86-NEXT:    retl
154;
155; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128:
156; X64:       # %bb.0:
157; X64-NEXT:    kmovw %edi, %k1
158; X64-NEXT:    vpconflictd %xmm0, %xmm0 {%k1} {z}
159; X64-NEXT:    retq
160  %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0)
161  %2 = bitcast i8 %x2 to <8 x i1>
162  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer
164  ret <4 x i32> %3
165}
166
167define <8 x i32> @test_int_x86_avx512_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1) {
168; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_256:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    vpconflictd %ymm0, %ymm0
171; CHECK-NEXT:    ret{{[l|q]}}
172  %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0)
173  ret <8 x i32> %1
174}
175
176define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
177; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
178; X86:       # %bb.0:
179; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
180; X86-NEXT:    kmovw %eax, %k1
181; X86-NEXT:    vpconflictd %ymm0, %ymm1 {%k1}
182; X86-NEXT:    vmovdqa %ymm1, %ymm0
183; X86-NEXT:    retl
184;
185; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
186; X64:       # %bb.0:
187; X64-NEXT:    kmovw %edi, %k1
188; X64-NEXT:    vpconflictd %ymm0, %ymm1 {%k1}
189; X64-NEXT:    vmovdqa %ymm1, %ymm0
190; X64-NEXT:    retq
191  %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0)
192  %2 = bitcast i8 %x2 to <8 x i1>
193  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
194  ret <8 x i32> %3
195}
196
197define <8 x i32> @test_int_x86_avx512_maskz_vpconflict_d_256(<8 x i32> %x0, i8 %x2) {
198; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256:
199; X86:       # %bb.0:
200; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
201; X86-NEXT:    kmovw %eax, %k1
202; X86-NEXT:    vpconflictd %ymm0, %ymm0 {%k1} {z}
203; X86-NEXT:    retl
204;
205; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256:
206; X64:       # %bb.0:
207; X64-NEXT:    kmovw %edi, %k1
208; X64-NEXT:    vpconflictd %ymm0, %ymm0 {%k1} {z}
209; X64-NEXT:    retq
210  %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0)
211  %2 = bitcast i8 %x2 to <8 x i1>
212  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
213  ret <8 x i32> %3
214}
215
216define <2 x i64> @test_int_x86_avx512_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1) {
217; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_128:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    vpconflictq %xmm0, %xmm0
220; CHECK-NEXT:    ret{{[l|q]}}
221  %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0)
222  ret <2 x i64> %1
223}
224
225define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
226; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
227; X86:       # %bb.0:
228; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
229; X86-NEXT:    kmovw %eax, %k1
230; X86-NEXT:    vpconflictq %xmm0, %xmm1 {%k1}
231; X86-NEXT:    vmovdqa %xmm1, %xmm0
232; X86-NEXT:    retl
233;
234; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
235; X64:       # %bb.0:
236; X64-NEXT:    kmovw %edi, %k1
237; X64-NEXT:    vpconflictq %xmm0, %xmm1 {%k1}
238; X64-NEXT:    vmovdqa %xmm1, %xmm0
239; X64-NEXT:    retq
240  %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0)
241  %2 = bitcast i8 %x2 to <8 x i1>
242  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
243  %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x1
244  ret <2 x i64> %3
245}
246
247define <2 x i64> @test_int_x86_avx512_maskz_vpconflict_q_128(<2 x i64> %x0, i8 %x2) {
248; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128:
249; X86:       # %bb.0:
250; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
251; X86-NEXT:    kmovw %eax, %k1
252; X86-NEXT:    vpconflictq %xmm0, %xmm0 {%k1} {z}
253; X86-NEXT:    retl
254;
255; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128:
256; X64:       # %bb.0:
257; X64-NEXT:    kmovw %edi, %k1
258; X64-NEXT:    vpconflictq %xmm0, %xmm0 {%k1} {z}
259; X64-NEXT:    retq
260  %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0)
261  %2 = bitcast i8 %x2 to <8 x i1>
262  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
263  %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> zeroinitializer
264  ret <2 x i64> %3
265}
266
267define <4 x i64> @test_int_x86_avx512_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1) {
268; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_256:
269; CHECK:       # %bb.0:
270; CHECK-NEXT:    vpconflictq %ymm0, %ymm0
271; CHECK-NEXT:    ret{{[l|q]}}
272  %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0)
273  ret <4 x i64> %1
274}
275
276define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
277; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
278; X86:       # %bb.0:
279; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
280; X86-NEXT:    kmovw %eax, %k1
281; X86-NEXT:    vpconflictq %ymm0, %ymm1 {%k1}
282; X86-NEXT:    vmovdqa %ymm1, %ymm0
283; X86-NEXT:    retl
284;
285; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
286; X64:       # %bb.0:
287; X64-NEXT:    kmovw %edi, %k1
288; X64-NEXT:    vpconflictq %ymm0, %ymm1 {%k1}
289; X64-NEXT:    vmovdqa %ymm1, %ymm0
290; X64-NEXT:    retq
291  %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0)
292  %2 = bitcast i8 %x2 to <8 x i1>
293  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
294  %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x1
295  ret <4 x i64> %3
296}
297
298define <4 x i64> @test_int_x86_avx512_maskz_vpconflict_q_256(<4 x i64> %x0, i8 %x2) {
299; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256:
300; X86:       # %bb.0:
301; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
302; X86-NEXT:    kmovw %eax, %k1
303; X86-NEXT:    vpconflictq %ymm0, %ymm0 {%k1} {z}
304; X86-NEXT:    retl
305;
306; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256:
307; X64:       # %bb.0:
308; X64-NEXT:    kmovw %edi, %k1
309; X64-NEXT:    vpconflictq %ymm0, %ymm0 {%k1} {z}
310; X64-NEXT:    retq
311  %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0)
312  %2 = bitcast i8 %x2 to <8 x i1>
313  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
314  %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> zeroinitializer
315  ret <4 x i64> %3
316}
317
318declare <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32>)
319declare <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32>)
320declare <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64>)
321declare <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64>)
322