xref: /llvm-project/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
3
4define <16 x half> @test_int_x86_avx512fp16_add_ph_256(<16 x half> %x1, <16 x half> %x2) {
5; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_256:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vaddph %ymm1, %ymm0, %ymm0
8; CHECK-NEXT:    retq
9  %res = fadd <16 x half> %x1, %x2
10  ret <16 x half> %res
11}
12
13define <16 x half> @test_int_x86_avx512fp16_mask_add_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
14; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_256:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    kmovd %edi, %k1
17; CHECK-NEXT:    vmovaps %ymm2, %ymm3
18; CHECK-NEXT:    vaddph %ymm1, %ymm0, %ymm3 {%k1}
19; CHECK-NEXT:    vaddph (%rsi), %ymm0, %ymm2 {%k1}
20; CHECK-NEXT:    vaddph %ymm2, %ymm3, %ymm0
21; CHECK-NEXT:    retq
22  %msk = bitcast i16 %mask to <16 x i1>
23  %val = load <16 x half>, ptr %ptr
24  %res0 = fadd <16 x half> %x1, %x2
25  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
26  %t3 = fadd <16 x half> %x1, %val
27  %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
28  %res   =  fadd <16 x half> %res1 , %res2
29  ret <16 x half> %res
30}
31
32define <16 x half> @test_int_x86_avx512fp16_maskz_add_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
33; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_256:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    kmovd %edi, %k1
36; CHECK-NEXT:    vaddph %ymm1, %ymm0, %ymm0 {%k1} {z}
37; CHECK-NEXT:    retq
38  %msk = bitcast i16 %mask to <16 x i1>
39  %res0 = fadd <16 x half> %x1, %x2
40  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
41  ret <16 x half> %res1
42}
43
44define <8 x half> @test_int_x86_avx512fp16_add_ph_128(<8 x half> %x1, <8 x half> %x2) {
45; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_128:
46; CHECK:       # %bb.0:
47; CHECK-NEXT:    vaddph %xmm1, %xmm0, %xmm0
48; CHECK-NEXT:    retq
49  %res = fadd <8 x half> %x1, %x2
50  ret <8 x half> %res
51}
52
53define <8 x half> @test_int_x86_avx512fp16_mask_add_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
54; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_128:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    kmovd %edi, %k1
57; CHECK-NEXT:    vmovaps %xmm2, %xmm3
58; CHECK-NEXT:    vaddph %xmm1, %xmm0, %xmm3 {%k1}
59; CHECK-NEXT:    vaddph (%rsi), %xmm0, %xmm2 {%k1}
60; CHECK-NEXT:    vaddph %xmm2, %xmm3, %xmm0
61; CHECK-NEXT:    retq
62  %msk = bitcast i8 %mask to <8 x i1>
63  %val = load <8 x half>, ptr %ptr
64  %res0 = fadd <8 x half> %x1, %x2
65  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
66  %t3 = fadd <8 x half> %x1, %val
67  %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
68  %res   =  fadd <8 x half> %res1 , %res2
69  ret <8 x half> %res
70}
71
72define <8 x half> @test_int_x86_avx512fp16_maskz_add_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
73; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_128:
74; CHECK:       # %bb.0:
75; CHECK-NEXT:    kmovd %edi, %k1
76; CHECK-NEXT:    vaddph %xmm1, %xmm0, %xmm0 {%k1} {z}
77; CHECK-NEXT:    retq
78  %msk = bitcast i8 %mask to <8 x i1>
79  %res0 = fadd <8 x half> %x1, %x2
80  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
81  ret <8 x half> %res1
82}
83
84define <16 x half> @test_int_x86_avx512fp16_sub_ph_256(<16 x half> %x1, <16 x half> %x2) {
85; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_256:
86; CHECK:       # %bb.0:
87; CHECK-NEXT:    vsubph %ymm1, %ymm0, %ymm0
88; CHECK-NEXT:    retq
89  %res = fsub <16 x half> %x1, %x2
90  ret <16 x half> %res
91}
92
93define <16 x half> @test_int_x86_avx512fp16_mask_sub_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
94; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_256:
95; CHECK:       # %bb.0:
96; CHECK-NEXT:    kmovd %edi, %k1
97; CHECK-NEXT:    vmovaps %ymm2, %ymm3
98; CHECK-NEXT:    vsubph %ymm1, %ymm0, %ymm3 {%k1}
99; CHECK-NEXT:    vsubph (%rsi), %ymm0, %ymm2 {%k1}
100; CHECK-NEXT:    vsubph %ymm2, %ymm3, %ymm0
101; CHECK-NEXT:    retq
102  %msk = bitcast i16 %mask to <16 x i1>
103  %val = load <16 x half>, ptr %ptr
104  %res0 = fsub <16 x half> %x1, %x2
105  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
106  %t3 = fsub <16 x half> %x1, %val
107  %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
108  %res   =  fsub <16 x half> %res1 , %res2
109  ret <16 x half> %res
110}
111
112define <16 x half> @test_int_x86_avx512fp16_maskz_sub_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
113; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_256:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    kmovd %edi, %k1
116; CHECK-NEXT:    vsubph %ymm1, %ymm0, %ymm0 {%k1} {z}
117; CHECK-NEXT:    retq
118  %msk = bitcast i16 %mask to <16 x i1>
119  %res0 = fsub <16 x half> %x1, %x2
120  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
121  ret <16 x half> %res1
122}
123
124define <8 x half> @test_int_x86_avx512fp16_sub_ph_128(<8 x half> %x1, <8 x half> %x2) {
125; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_128:
126; CHECK:       # %bb.0:
127; CHECK-NEXT:    vsubph %xmm1, %xmm0, %xmm0
128; CHECK-NEXT:    retq
129  %res = fsub <8 x half> %x1, %x2
130  ret <8 x half> %res
131}
132
133define <8 x half> @test_int_x86_avx512fp16_mask_sub_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
134; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_128:
135; CHECK:       # %bb.0:
136; CHECK-NEXT:    kmovd %edi, %k1
137; CHECK-NEXT:    vmovaps %xmm2, %xmm3
138; CHECK-NEXT:    vsubph %xmm1, %xmm0, %xmm3 {%k1}
139; CHECK-NEXT:    vsubph (%rsi), %xmm0, %xmm2 {%k1}
140; CHECK-NEXT:    vsubph %xmm2, %xmm3, %xmm0
141; CHECK-NEXT:    retq
142  %msk = bitcast i8 %mask to <8 x i1>
143  %val = load <8 x half>, ptr %ptr
144  %res0 = fsub <8 x half> %x1, %x2
145  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
146  %t3 = fsub <8 x half> %x1, %val
147  %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
148  %res   =  fsub <8 x half> %res1 , %res2
149  ret <8 x half> %res
150}
151
152define <8 x half> @test_int_x86_avx512fp16_maskz_sub_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
153; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_128:
154; CHECK:       # %bb.0:
155; CHECK-NEXT:    kmovd %edi, %k1
156; CHECK-NEXT:    vsubph %xmm1, %xmm0, %xmm0 {%k1} {z}
157; CHECK-NEXT:    retq
158  %msk = bitcast i8 %mask to <8 x i1>
159  %res0 = fsub <8 x half> %x1, %x2
160  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
161  ret <8 x half> %res1
162}
163
164define <16 x half> @test_int_x86_avx512fp16_mul_ph_256(<16 x half> %x1, <16 x half> %x2) {
165; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_256:
166; CHECK:       # %bb.0:
167; CHECK-NEXT:    vmulph %ymm1, %ymm0, %ymm0
168; CHECK-NEXT:    retq
169  %res = fmul <16 x half> %x1, %x2
170  ret <16 x half> %res
171}
172
173define <16 x half> @test_int_x86_avx512fp16_mask_mul_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
174; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_256:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    kmovd %edi, %k1
177; CHECK-NEXT:    vmovaps %ymm2, %ymm3
178; CHECK-NEXT:    vmulph %ymm1, %ymm0, %ymm3 {%k1}
179; CHECK-NEXT:    vmulph (%rsi), %ymm0, %ymm2 {%k1}
180; CHECK-NEXT:    vmulph %ymm2, %ymm3, %ymm0
181; CHECK-NEXT:    retq
182  %msk = bitcast i16 %mask to <16 x i1>
183  %val = load <16 x half>, ptr %ptr
184  %res0 = fmul <16 x half> %x1, %x2
185  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
186  %t3 = fmul <16 x half> %x1, %val
187  %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
188  %res   =  fmul <16 x half> %res1 , %res2
189  ret <16 x half> %res
190}
191
192define <16 x half> @test_int_x86_avx512fp16_maskz_mul_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
193; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_256:
194; CHECK:       # %bb.0:
195; CHECK-NEXT:    kmovd %edi, %k1
196; CHECK-NEXT:    vmulph %ymm1, %ymm0, %ymm0 {%k1} {z}
197; CHECK-NEXT:    retq
198  %msk = bitcast i16 %mask to <16 x i1>
199  %res0 = fmul <16 x half> %x1, %x2
200  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
201  ret <16 x half> %res1
202}
203
204define <8 x half> @test_int_x86_avx512fp16_mul_ph_128(<8 x half> %x1, <8 x half> %x2) {
205; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_128:
206; CHECK:       # %bb.0:
207; CHECK-NEXT:    vmulph %xmm1, %xmm0, %xmm0
208; CHECK-NEXT:    retq
209  %res = fmul <8 x half> %x1, %x2
210  ret <8 x half> %res
211}
212
213define <8 x half> @test_int_x86_avx512fp16_mask_mul_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
214; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_128:
215; CHECK:       # %bb.0:
216; CHECK-NEXT:    kmovd %edi, %k1
217; CHECK-NEXT:    vmovaps %xmm2, %xmm3
218; CHECK-NEXT:    vmulph %xmm1, %xmm0, %xmm3 {%k1}
219; CHECK-NEXT:    vmulph (%rsi), %xmm0, %xmm2 {%k1}
220; CHECK-NEXT:    vmulph %xmm2, %xmm3, %xmm0
221; CHECK-NEXT:    retq
222  %msk = bitcast i8 %mask to <8 x i1>
223  %val = load <8 x half>, ptr %ptr
224  %res0 = fmul <8 x half> %x1, %x2
225  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
226  %t3 = fmul <8 x half> %x1, %val
227  %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
228  %res   =  fmul <8 x half> %res1 , %res2
229  ret <8 x half> %res
230}
231
232define <8 x half> @test_int_x86_avx512fp16_maskz_mul_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
233; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_128:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    kmovd %edi, %k1
236; CHECK-NEXT:    vmulph %xmm1, %xmm0, %xmm0 {%k1} {z}
237; CHECK-NEXT:    retq
238  %msk = bitcast i8 %mask to <8 x i1>
239  %res0 = fmul <8 x half> %x1, %x2
240  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
241  ret <8 x half> %res1
242}
243
244define <16 x half> @test_int_x86_avx512fp16_div_ph_256(<16 x half> %x1, <16 x half> %x2) {
245; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    vdivph %ymm1, %ymm0, %ymm0
248; CHECK-NEXT:    retq
249  %res = fdiv <16 x half> %x1, %x2
250  ret <16 x half> %res
251}
252
253define <16 x half> @test_int_x86_avx512fp16_div_ph_256_fast(<16 x half> %x1, <16 x half> %x2) {
254; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256_fast:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    vrcpph %ymm1, %ymm1
257; CHECK-NEXT:    vmulph %ymm0, %ymm1, %ymm0
258; CHECK-NEXT:    retq
259  %res = fdiv fast <16 x half> %x1, %x2
260  ret <16 x half> %res
261}
262
263define <16 x half> @test_int_x86_avx512fp16_mask_div_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
264; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_256:
265; CHECK:       # %bb.0:
266; CHECK-NEXT:    kmovd %edi, %k1
267; CHECK-NEXT:    vmovaps %ymm2, %ymm3
268; CHECK-NEXT:    vdivph %ymm1, %ymm0, %ymm3 {%k1}
269; CHECK-NEXT:    vdivph (%rsi), %ymm0, %ymm2 {%k1}
270; CHECK-NEXT:    vdivph %ymm2, %ymm3, %ymm0
271; CHECK-NEXT:    retq
272  %msk = bitcast i16 %mask to <16 x i1>
273  %val = load <16 x half>, ptr %ptr
274  %res0 = fdiv <16 x half> %x1, %x2
275  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
276  %t3 = fdiv <16 x half> %x1, %val
277  %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
278  %res   =  fdiv <16 x half> %res1 , %res2
279  ret <16 x half> %res
280}
281
282define <16 x half> @test_int_x86_avx512fp16_maskz_div_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
283; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_256:
284; CHECK:       # %bb.0:
285; CHECK-NEXT:    kmovd %edi, %k1
286; CHECK-NEXT:    vdivph %ymm1, %ymm0, %ymm0 {%k1} {z}
287; CHECK-NEXT:    retq
288  %msk = bitcast i16 %mask to <16 x i1>
289  %res0 = fdiv <16 x half> %x1, %x2
290  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
291  ret <16 x half> %res1
292}
293
294define <8 x half> @test_int_x86_avx512fp16_div_ph_128(<8 x half> %x1, <8 x half> %x2) {
295; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128:
296; CHECK:       # %bb.0:
297; CHECK-NEXT:    vdivph %xmm1, %xmm0, %xmm0
298; CHECK-NEXT:    retq
299  %res = fdiv <8 x half> %x1, %x2
300  ret <8 x half> %res
301}
302
303define <8 x half> @test_int_x86_avx512fp16_div_ph_128_fast(<8 x half> %x1, <8 x half> %x2) {
304; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128_fast:
305; CHECK:       # %bb.0:
306; CHECK-NEXT:    vrcpph %xmm1, %xmm1
307; CHECK-NEXT:    vmulph %xmm0, %xmm1, %xmm0
308; CHECK-NEXT:    retq
309  %res = fdiv fast <8 x half> %x1, %x2
310  ret <8 x half> %res
311}
312
313define <8 x half> @test_int_x86_avx512fp16_mask_div_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
314; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_128:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    kmovd %edi, %k1
317; CHECK-NEXT:    vmovaps %xmm2, %xmm3
318; CHECK-NEXT:    vdivph %xmm1, %xmm0, %xmm3 {%k1}
319; CHECK-NEXT:    vdivph (%rsi), %xmm0, %xmm2 {%k1}
320; CHECK-NEXT:    vdivph %xmm2, %xmm3, %xmm0
321; CHECK-NEXT:    retq
322  %msk = bitcast i8 %mask to <8 x i1>
323  %val = load <8 x half>, ptr %ptr
324  %res0 = fdiv <8 x half> %x1, %x2
325  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
326  %t3 = fdiv <8 x half> %x1, %val
327  %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
328  %res   =  fdiv <8 x half> %res1 , %res2
329  ret <8 x half> %res
330}
331
332define <8 x half> @test_int_x86_avx512fp16_maskz_div_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
333; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_128:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    kmovd %edi, %k1
336; CHECK-NEXT:    vdivph %xmm1, %xmm0, %xmm0 {%k1} {z}
337; CHECK-NEXT:    retq
338  %msk = bitcast i8 %mask to <8 x i1>
339  %res0 = fdiv <8 x half> %x1, %x2
340  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
341  ret <8 x half> %res1
342}
343
344define <16 x half> @test_min_ph_256(<16 x half> %x1, <16 x half> %x2) {
345; CHECK-LABEL: test_min_ph_256:
346; CHECK:       # %bb.0:
347; CHECK-NEXT:    vminph %ymm1, %ymm0, %ymm0
348; CHECK-NEXT:    retq
349  %res0 = fcmp olt <16 x half> %x1, %x2
350  %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2
351  ret  <16 x half> %res1
352}
353
354define <16 x half> @test_max_ph_256(<16 x half> %x1, <16 x half> %x2) {
355; CHECK-LABEL: test_max_ph_256:
356; CHECK:       # %bb.0:
357; CHECK-NEXT:    vmaxph %ymm1, %ymm0, %ymm0
358; CHECK-NEXT:    retq
359  %res0 = fcmp ogt <16 x half> %x1, %x2
360  %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2
361  ret  <16 x half> %res1
362}
363
364define <8 x half> @test_min_ph_128(<8 x half> %x1, <8 x half> %x2) {
365; CHECK-LABEL: test_min_ph_128:
366; CHECK:       # %bb.0:
367; CHECK-NEXT:    vminph %xmm1, %xmm0, %xmm0
368; CHECK-NEXT:    retq
369  %res0 = fcmp olt <8 x half> %x1, %x2
370  %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2
371  ret  <8 x half> %res1
372}
373
374define <8 x half> @test_max_ph_128(<8 x half> %x1, <8 x half> %x2) {
375; CHECK-LABEL: test_max_ph_128:
376; CHECK:       # %bb.0:
377; CHECK-NEXT:    vmaxph %xmm1, %xmm0, %xmm0
378; CHECK-NEXT:    retq
379  %res0 = fcmp ogt <8 x half> %x1, %x2
380  %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2
381  ret  <8 x half> %res1
382}
383
384declare <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half>, <8 x half>)
385declare <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half>, <16 x half>)
386
387define <8 x half> @test_max_ph_128_2(<8 x half> %x1, <8 x half> %x2) {
388; CHECK-LABEL: test_max_ph_128_2:
389; CHECK:       # %bb.0:
390; CHECK-NEXT:    vmaxph %xmm1, %xmm0, %xmm0
391; CHECK-NEXT:    retq
392  %res0 = call <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half> %x1, <8 x half> %x2)
393  ret  <8 x half> %res0
394}
395
396define <16 x half> @test_max_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
397; CHECK-LABEL: test_max_ph_256_2:
398; CHECK:       # %bb.0:
399; CHECK-NEXT:    vmaxph %ymm1, %ymm0, %ymm0
400; CHECK-NEXT:    retq
401  %res0 = call <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half> %x1, <16 x half> %x2)
402  ret  <16 x half> %res0
403}
404
405declare <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half>, <8 x half>)
406declare <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half>, <16 x half>)
407
408define <8 x half> @test_min_ph_128_2(<8 x half> %x1, <8 x half> %x2) {
409; CHECK-LABEL: test_min_ph_128_2:
410; CHECK:       # %bb.0:
411; CHECK-NEXT:    vminph %xmm1, %xmm0, %xmm0
412; CHECK-NEXT:    retq
413  %res0 = call <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half> %x1, <8 x half> %x2)
414  ret  <8 x half> %res0
415}
416
417define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
418; CHECK-LABEL: test_min_ph_256_2:
419; CHECK:       # %bb.0:
420; CHECK-NEXT:    vminph %ymm1, %ymm0, %ymm0
421; CHECK-NEXT:    retq
422  %res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2)
423  ret  <16 x half> %res0
424}
425
426declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
427
428define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
429; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256:
430; CHECK:       # %bb.0:
431; CHECK-NEXT:    kmovd %edi, %k1
432; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm1 {%k1}
433; CHECK-NEXT:    vmovaps %ymm1, %ymm0
434; CHECK-NEXT:    retq
435  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
436  ret <4 x double> %res
437}
438
439define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
440; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask:
441; CHECK:       # %bb.0:
442; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm0
443; CHECK-NEXT:    retq
444  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
445  ret <4 x double> %res
446}
447
448declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
449
450define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
451; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128:
452; CHECK:       # %bb.0:
453; CHECK-NEXT:    kmovd %edi, %k1
454; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm1 {%k1}
455; CHECK-NEXT:    vmovaps %xmm1, %xmm0
456; CHECK-NEXT:    retq
457  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
458  ret <2 x double> %res
459}
460
461define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
462; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask:
463; CHECK:       # %bb.0:
464; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm0
465; CHECK-NEXT:    retq
466  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
467  ret <2 x double> %res
468}
469
470declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
471
472define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
473; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256:
474; CHECK:       # %bb.0:
475; CHECK-NEXT:    kmovd %edi, %k1
476; CHECK-NEXT:    vcvtpd2ph %ymm0, %xmm1 {%k1}
477; CHECK-NEXT:    vmovaps %xmm1, %xmm0
478; CHECK-NEXT:    vzeroupper
479; CHECK-NEXT:    retq
480  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
481  ret <8 x half> %res
482}
483
484define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) {
485; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load:
486; CHECK:       # %bb.0:
487; CHECK-NEXT:    kmovd %esi, %k1
488; CHECK-NEXT:    vcvtpd2phy (%rdi), %xmm0 {%k1}
489; CHECK-NEXT:    retq
490  %x0 = load <4 x double>, ptr %px0, align 32
491  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
492  ret <8 x half> %res
493}
494
495declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
496
497define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
498; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128:
499; CHECK:       # %bb.0:
500; CHECK-NEXT:    kmovd %edi, %k1
501; CHECK-NEXT:    vcvtpd2ph %xmm0, %xmm1 {%k1}
502; CHECK-NEXT:    vmovaps %xmm1, %xmm0
503; CHECK-NEXT:    retq
504  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
505  ret <8 x half> %res
506}
507
508define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) {
509; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load:
510; CHECK:       # %bb.0:
511; CHECK-NEXT:    kmovd %esi, %k1
512; CHECK-NEXT:    vcvtpd2phx (%rdi), %xmm0 {%k1}
513; CHECK-NEXT:    retq
514  %x0 = load <2 x double>, ptr %px0, align 16
515  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
516  ret <8 x half> %res
517}
518
519declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
520
521define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
522; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
523; CHECK:       # %bb.0:
524; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0
525; CHECK-NEXT:    retq
526  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
527  ret <4 x i32> %res
528}
529
530define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
531; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
532; CHECK:       # %bb.0:
533; CHECK-NEXT:    kmovd %edi, %k1
534; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm1 {%k1}
535; CHECK-NEXT:    vmovaps %xmm1, %xmm0
536; CHECK-NEXT:    retq
537  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
538  ret <4 x i32> %res
539}
540
541define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
542; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
543; CHECK:       # %bb.0:
544; CHECK-NEXT:    kmovd %edi, %k1
545; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0 {%k1} {z}
546; CHECK-NEXT:    retq
547  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
548  ret <4 x i32> %res
549}
550
551declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
552
553define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
554; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
555; CHECK:       # %bb.0:
556; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0
557; CHECK-NEXT:    retq
558  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
559  ret <8 x i32> %res
560}
561
562define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
563; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
564; CHECK:       # %bb.0:
565; CHECK-NEXT:    kmovd %edi, %k1
566; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm1 {%k1}
567; CHECK-NEXT:    vmovaps %ymm1, %ymm0
568; CHECK-NEXT:    retq
569  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
570  ret <8 x i32> %res
571}
572
573define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
574; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
575; CHECK:       # %bb.0:
576; CHECK-NEXT:    kmovd %edi, %k1
577; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0 {%k1} {z}
578; CHECK-NEXT:    retq
579  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
580  ret <8 x i32> %res
581}
582
583declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
584
585define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
586; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
587; CHECK:       # %bb.0:
588; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
589; CHECK-NEXT:    retq
590  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
591  ret <4 x i32> %res
592}
593
594define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
595; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
596; CHECK:       # %bb.0:
597; CHECK-NEXT:    kmovd %edi, %k1
598; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm1 {%k1}
599; CHECK-NEXT:    vmovaps %xmm1, %xmm0
600; CHECK-NEXT:    retq
601  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
602  ret <4 x i32> %res
603}
604
605define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
606; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
607; CHECK:       # %bb.0:
608; CHECK-NEXT:    kmovd %edi, %k1
609; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0 {%k1} {z}
610; CHECK-NEXT:    retq
611  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
612  ret <4 x i32> %res
613}
614
615declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
616
617define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
618; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
619; CHECK:       # %bb.0:
620; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
621; CHECK-NEXT:    retq
622  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
623  ret <8 x i32> %res
624}
625
626define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
627; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
628; CHECK:       # %bb.0:
629; CHECK-NEXT:    kmovd %edi, %k1
630; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm1 {%k1}
631; CHECK-NEXT:    vmovaps %ymm1, %ymm0
632; CHECK-NEXT:    retq
633  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
634  ret <8 x i32> %res
635}
636
637define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
638; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
639; CHECK:       # %bb.0:
640; CHECK-NEXT:    kmovd %edi, %k1
641; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0 {%k1} {z}
642; CHECK-NEXT:    retq
643  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
644  ret <8 x i32> %res
645}
646
647declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
648
649define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
650; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
651; CHECK:       # %bb.0:
652; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
653; CHECK-NEXT:    retq
654  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
655  ret <4 x i32> %res
656}
657
658define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
659; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
660; CHECK:       # %bb.0:
661; CHECK-NEXT:    kmovd %edi, %k1
662; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm1 {%k1}
663; CHECK-NEXT:    vmovaps %xmm1, %xmm0
664; CHECK-NEXT:    retq
665  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
666  ret <4 x i32> %res
667}
668
669define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
670; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
671; CHECK:       # %bb.0:
672; CHECK-NEXT:    kmovd %edi, %k1
673; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0 {%k1} {z}
674; CHECK-NEXT:    retq
675  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
676  ret <4 x i32> %res
677}
678
679declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
680
681define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
682; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
683; CHECK:       # %bb.0:
684; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0
685; CHECK-NEXT:    retq
686  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
687  ret <8 x i32> %res
688}
689
690define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
691; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
692; CHECK:       # %bb.0:
693; CHECK-NEXT:    kmovd %edi, %k1
694; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm1 {%k1}
695; CHECK-NEXT:    vmovaps %ymm1, %ymm0
696; CHECK-NEXT:    retq
697  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
698  ret <8 x i32> %res
699}
700
701define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
702; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
703; CHECK:       # %bb.0:
704; CHECK-NEXT:    kmovd %edi, %k1
705; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0 {%k1} {z}
706; CHECK-NEXT:    retq
707  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
708  ret <8 x i32> %res
709}
710
711declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8)
712
713define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) {
714; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128:
715; CHECK:       # %bb.0:
716; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0
717; CHECK-NEXT:    retq
718  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1)
719  ret <4 x float> %res
720}
721
722define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
723; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128:
724; CHECK:       # %bb.0:
725; CHECK-NEXT:    kmovd %edi, %k1
726; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm1 {%k1}
727; CHECK-NEXT:    vmovaps %xmm1, %xmm0
728; CHECK-NEXT:    retq
729  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2)
730  ret <4 x float> %res
731}
732
733define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) {
734; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128:
735; CHECK:       # %bb.0:
736; CHECK-NEXT:    kmovd %edi, %k1
737; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0 {%k1} {z}
738; CHECK-NEXT:    retq
739  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2)
740  ret <4 x float> %res
741}
742
743declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8)
744
745define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) {
746; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256:
747; CHECK:       # %bb.0:
748; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm0
749; CHECK-NEXT:    retq
750  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1)
751  ret <8 x float> %res
752}
753
754define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) {
755; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256:
756; CHECK:       # %bb.0:
757; CHECK-NEXT:    kmovd %edi, %k1
758; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm1 {%k1}
759; CHECK-NEXT:    vmovaps %ymm1, %ymm0
760; CHECK-NEXT:    retq
761  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2)
762  ret <8 x float> %res
763}
764
765define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) {
766; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256:
767; CHECK:       # %bb.0:
768; CHECK-NEXT:    kmovd %edi, %k1
769; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm0 {%k1} {z}
770; CHECK-NEXT:    retq
771  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2)
772  ret <8 x float> %res
773}
774
775declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)
776
777define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
778; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128:
779; CHECK:       # %bb.0:
780; CHECK-NEXT:    kmovd %edi, %k1
781; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm1 {%k1}
782; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm0
783; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
784; CHECK-NEXT:    retq
785  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2)
786  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1)
787  %res2 = fadd <8 x half> %res, %res1
788  ret <8 x half> %res2
789}
790
791declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)
792
793define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) {
794; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256:
795; CHECK:       # %bb.0:
796; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0
797; CHECK-NEXT:    vzeroupper
798; CHECK-NEXT:    retq
799  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1)
800  ret <8 x half> %res
801}
802
803define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) {
804; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256:
805; CHECK:       # %bb.0:
806; CHECK-NEXT:    kmovd %edi, %k1
807; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm1 {%k1}
808; CHECK-NEXT:    vmovaps %xmm1, %xmm0
809; CHECK-NEXT:    vzeroupper
810; CHECK-NEXT:    retq
811  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2)
812  ret <8 x half> %res
813}
814
815define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) {
816; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256:
817; CHECK:       # %bb.0:
818; CHECK-NEXT:    kmovd %edi, %k1
819; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0 {%k1} {z}
820; CHECK-NEXT:    vzeroupper
821; CHECK-NEXT:    retq
822  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2)
823  ret <8 x half> %res
824}
825