xref: /llvm-project/llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
3
4declare <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half>, <32 x half>, i32)
5
6define <32 x half> @test_int_x86_avx512fp16_add_ph_512(<32 x half> %x1, <32 x half> %x2) {
7; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_512:
8; CHECK:       # %bb.0:
9; CHECK-NEXT:    vaddph %zmm1, %zmm0, %zmm0
10; CHECK-NEXT:    retq
11  %res = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
12  ret <32 x half> %res
13}
14
15define <32 x half> @test_int_x86_avx512fp16_mask_add_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) {
16; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_512:
17; CHECK:       # %bb.0:
18; CHECK-NEXT:    kmovd %edi, %k1
19; CHECK-NEXT:    vaddph %zmm2, %zmm1, %zmm0 {%k1}
20; CHECK-NEXT:    retq
21  %mask = bitcast i32 %msk to <32 x i1>
22  %res0 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
23  %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src
24  ret <32 x half> %res
25}
26
27define <32 x half> @test_int_x86_avx512fp16_maskz_add_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) {
28; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_512:
29; CHECK:       # %bb.0:
30; CHECK-NEXT:    kmovd %edi, %k1
31; CHECK-NEXT:    vaddph %zmm2, %zmm1, %zmm0 {%k1} {z}
32; CHECK-NEXT:    vaddph (%rsi), %zmm1, %zmm1 {%k1} {z}
33; CHECK-NEXT:    vaddph %zmm1, %zmm0, %zmm0
34; CHECK-NEXT:    retq
35  %mask = bitcast i32 %msk to <32 x i1>
36  %val = load <32 x half>, ptr %ptr
37  %res0 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
38  %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
39  %t2 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %val, i32 4)
40  %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer
41  %res3  =  fadd <32 x half> %res1, %res2
42  ret <32 x half> %res3
43}
44
45define <32 x half> @test_int_x86_avx512fp16_add_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) {
46; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_512_round:
47; CHECK:       # %bb.0:
48; CHECK-NEXT:    kmovd %edi, %k1
49; CHECK-NEXT:    vaddph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
50; CHECK-NEXT:    vmovaps %zmm2, %zmm0
51; CHECK-NEXT:    retq
52  %mask = bitcast i32 %msk to <32 x i1>
53  %t1 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10)
54  %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src
55  ret <32 x half> %res
56}
57
58declare <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half>, <32 x half>, i32)
59
60define <32 x half> @test_int_x86_avx512fp16_sub_ph_512(<32 x half> %x1, <32 x half> %x2) {
61; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_512:
62; CHECK:       # %bb.0:
63; CHECK-NEXT:    vsubph %zmm1, %zmm0, %zmm0
64; CHECK-NEXT:    retq
65  %res = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
66  ret <32 x half> %res
67}
68
69define <32 x half> @test_int_x86_avx512fp16_mask_sub_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) {
70; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_512:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    kmovd %edi, %k1
73; CHECK-NEXT:    vsubph %zmm2, %zmm1, %zmm0 {%k1}
74; CHECK-NEXT:    retq
75  %mask = bitcast i32 %msk to <32 x i1>
76  %res0 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
77  %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src
78  ret <32 x half> %res
79}
80
81define <32 x half> @test_int_x86_avx512fp16_maskz_sub_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) {
82; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_512:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    kmovd %edi, %k1
85; CHECK-NEXT:    vsubph %zmm2, %zmm1, %zmm0 {%k1} {z}
86; CHECK-NEXT:    vsubph (%rsi), %zmm1, %zmm1
87; CHECK-NEXT:    vsubph %zmm1, %zmm0, %zmm0 {%k1}
88; CHECK-NEXT:    retq
89  %mask = bitcast i32 %msk to <32 x i1>
90  %val = load <32 x half>, ptr %ptr
91  %res0 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
92  %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
93  %t2 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %val, i32 4)
94  %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer
95  %res3  =  fsub <32 x half> %res1, %res2
96  ret <32 x half> %res3
97}
98
99define <32 x half> @test_int_x86_avx512fp16_sub_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) {
100; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_512_round:
101; CHECK:       # %bb.0:
102; CHECK-NEXT:    kmovd %edi, %k1
103; CHECK-NEXT:    vsubph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
104; CHECK-NEXT:    vmovaps %zmm2, %zmm0
105; CHECK-NEXT:    retq
106  %mask = bitcast i32 %msk to <32 x i1>
107  %t1 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10)
108  %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src
109  ret <32 x half> %res
110}
111
112declare <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half>, <32 x half>, i32)
113
114define <32 x half> @test_int_x86_avx512fp16_mul_ph_512(<32 x half> %x1, <32 x half> %x2) {
115; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_512:
116; CHECK:       # %bb.0:
117; CHECK-NEXT:    vmulph %zmm1, %zmm0, %zmm0
118; CHECK-NEXT:    retq
119  %res = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
120  ret <32 x half> %res
121}
122
123define <32 x half> @test_int_x86_avx512fp16_mask_mul_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) {
124; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_512:
125; CHECK:       # %bb.0:
126; CHECK-NEXT:    kmovd %edi, %k1
127; CHECK-NEXT:    vmulph %zmm2, %zmm1, %zmm0 {%k1}
128; CHECK-NEXT:    retq
129  %mask = bitcast i32 %msk to <32 x i1>
130  %res0 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
131  %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src
132  ret <32 x half> %res
133}
134
135define <32 x half> @test_int_x86_avx512fp16_maskz_mul_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) {
136; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_512:
137; CHECK:       # %bb.0:
138; CHECK-NEXT:    kmovd %edi, %k1
139; CHECK-NEXT:    vmulph %zmm2, %zmm1, %zmm0 {%k1} {z}
140; CHECK-NEXT:    vmulph (%rsi), %zmm1, %zmm1 {%k1} {z}
141; CHECK-NEXT:    vmulph %zmm1, %zmm0, %zmm0
142; CHECK-NEXT:    retq
143  %mask = bitcast i32 %msk to <32 x i1>
144  %val = load <32 x half>, ptr %ptr
145  %res0 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
146  %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
147  %t2 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %val, i32 4)
148  %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer
149  %res3  =  fmul <32 x half> %res1, %res2
150  ret <32 x half> %res3
151}
152
153define <32 x half> @test_int_x86_avx512fp16_mul_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) {
154; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_512_round:
155; CHECK:       # %bb.0:
156; CHECK-NEXT:    kmovd %edi, %k1
157; CHECK-NEXT:    vmulph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
158; CHECK-NEXT:    vmovaps %zmm2, %zmm0
159; CHECK-NEXT:    retq
160  %mask = bitcast i32 %msk to <32 x i1>
161  %t1 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10)
162  %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src
163  ret <32 x half> %res
164}
165
166declare <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half>, <32 x half>, i32)
167
168define <32 x half> @test_int_x86_avx512fp16_div_ph_512(<32 x half> %x1, <32 x half> %x2) {
169; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_512:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    vdivph %zmm1, %zmm0, %zmm0
172; CHECK-NEXT:    retq
173  %res = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
174  ret <32 x half> %res
175}
176
177define <32 x half> @test_int_x86_avx512fp16_mask_div_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) {
178; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_512:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    kmovd %edi, %k1
181; CHECK-NEXT:    vdivph %zmm2, %zmm1, %zmm0 {%k1}
182; CHECK-NEXT:    retq
183  %mask = bitcast i32 %msk to <32 x i1>
184  %res0 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
185  %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src
186  ret <32 x half> %res
187}
188
189define <32 x half> @test_int_x86_avx512fp16_maskz_div_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) {
190; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_512:
191; CHECK:       # %bb.0:
192; CHECK-NEXT:    kmovd %edi, %k1
193; CHECK-NEXT:    vdivph %zmm2, %zmm1, %zmm0 {%k1} {z}
194; CHECK-NEXT:    vdivph (%rsi), %zmm1, %zmm1 {%k1} {z}
195; CHECK-NEXT:    vdivph %zmm1, %zmm0, %zmm0
196; CHECK-NEXT:    retq
197  %mask = bitcast i32 %msk to <32 x i1>
198  %val = load <32 x half>, ptr %ptr
199  %res0 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
200  %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
201  %t2 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %val, i32 4)
202  %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer
203  %res3  =  fdiv <32 x half> %res1, %res2
204  ret <32 x half> %res3
205}
206
207define <32 x half> @test_int_x86_avx512fp16_div_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) {
208; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_512_round:
209; CHECK:       # %bb.0:
210; CHECK-NEXT:    kmovd %edi, %k1
211; CHECK-NEXT:    vdivph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
212; CHECK-NEXT:    vmovaps %zmm2, %zmm0
213; CHECK-NEXT:    retq
214  %mask = bitcast i32 %msk to <32 x i1>
215  %t1 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10)
216  %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src
217  ret <32 x half> %res
218}
219
220declare <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half>, <32 x half>, i32)
221
222define <32 x half> @test_min_ph(<32 x half> %x1, <32 x half> %x2) {
223; CHECK-LABEL: test_min_ph:
224; CHECK:       # %bb.0:
225; CHECK-NEXT:    vminph %zmm1, %zmm0, %zmm0
226; CHECK-NEXT:    retq
227  %res0 = fcmp olt <32 x half> %x1, %x2
228  %res1 = select <32 x i1> %res0, <32 x half> %x1, <32 x half> %x2
229  ret  <32 x half> %res1
230}
231
232define <32 x half> @test_int_x86_avx512fp16_min_ph_512_sae(<32 x half> %x1, <32 x half> %x2) {
233; CHECK-LABEL: test_int_x86_avx512fp16_min_ph_512_sae:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    vminph {sae}, %zmm1, %zmm0, %zmm0
236; CHECK-NEXT:    retq
237  %res0 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8)
238  ret <32 x half> %res0
239}
240
241define <32 x half> @test_int_x86_avx512fp16_maskz_min_ph_512_sae(<32 x half> %x1, <32 x half> %x2, i32 %msk) {
242; CHECK-LABEL: test_int_x86_avx512fp16_maskz_min_ph_512_sae:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    kmovd %edi, %k1
245; CHECK-NEXT:    vminph {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
246; CHECK-NEXT:    retq
247  %mask = bitcast i32 %msk to <32 x i1>
248  %res0 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8)
249  %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
250  ret <32 x half> %res1
251}
252
253declare <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half>, <32 x half>, i32)
254
255define <32 x half> @test_max_ph(<32 x half> %x1, <32 x half> %x2) {
256; CHECK-LABEL: test_max_ph:
257; CHECK:       # %bb.0:
258; CHECK-NEXT:    vmaxph %zmm1, %zmm0, %zmm0
259; CHECK-NEXT:    retq
260  %res0 = fcmp ogt <32 x half> %x1, %x2
261  %res1 = select <32 x i1> %res0, <32 x half> %x1, <32 x half> %x2
262  ret  <32 x half> %res1
263}
264
265define <32 x half> @test_int_x86_avx512fp16_max_ph_512_sae(<32 x half> %x1, <32 x half> %x2) {
266; CHECK-LABEL: test_int_x86_avx512fp16_max_ph_512_sae:
267; CHECK:       # %bb.0:
268; CHECK-NEXT:    vmaxph {sae}, %zmm1, %zmm0, %zmm0
269; CHECK-NEXT:    retq
270  %res0 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8)
271  ret <32 x half> %res0
272}
273
274define <32 x half> @test_int_x86_avx512fp16_maskz_max_ph_512_sae(<32 x half> %x1, <32 x half> %x2, i32 %msk) {
275; CHECK-LABEL: test_int_x86_avx512fp16_maskz_max_ph_512_sae:
276; CHECK:       # %bb.0:
277; CHECK-NEXT:    kmovd %edi, %k1
278; CHECK-NEXT:    vmaxph {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
279; CHECK-NEXT:    retq
280  %mask = bitcast i32 %msk to <32 x i1>
281  %res0 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8)
282  %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
283  ret <32 x half> %res1
284}
285
286declare <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half>, <8 x double>, i8, i32)
287
288define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
289; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd:
290; CHECK:       # %bb.0:
291; CHECK-NEXT:    kmovd %edi, %k1
292; CHECK-NEXT:    vcvtph2pd %xmm0, %zmm1 {%k1}
293; CHECK-NEXT:    vmovaps %zmm1, %zmm0
294; CHECK-NEXT:    retq
295  %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
296  ret <8 x double> %res
297}
298
299define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_sae(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
300; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_sae:
301; CHECK:       # %bb.0:
302; CHECK-NEXT:    kmovd %edi, %k1
303; CHECK-NEXT:    vcvtph2pd {sae}, %xmm0, %zmm1 {%k1}
304; CHECK-NEXT:    vmovaps %zmm1, %zmm0
305; CHECK-NEXT:    retq
306  %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 8)
307  ret <8 x double> %res
308}
309
310define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_nomask(<8 x half> %x0, <8 x double> %x1) {
311; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_nomask:
312; CHECK:       # %bb.0:
313; CHECK-NEXT:    vcvtph2pd %xmm0, %zmm0
314; CHECK-NEXT:    retq
315  %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 -1, i32 4)
316  ret <8 x double> %res
317}
318
319define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_load(ptr %px0, <8 x double> %x1, i8 %x2) {
320; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_load:
321; CHECK:       # %bb.0:
322; CHECK-NEXT:    kmovd %esi, %k1
323; CHECK-NEXT:    vcvtph2pd (%rdi), %zmm0 {%k1}
324; CHECK-NEXT:    retq
325  %x0 = load <8 x half>, ptr %px0, align 16
326  %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
327  ret <8 x double> %res
328}
329
330declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double>, <8 x half>, i8, i32)
331
332define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
333; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    kmovd %edi, %k1
336; CHECK-NEXT:    vcvtpd2ph %zmm0, %xmm1 {%k1}
337; CHECK-NEXT:    vmovaps %xmm1, %xmm0
338; CHECK-NEXT:    vzeroupper
339; CHECK-NEXT:    retq
340  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
341  ret <8 x half> %res
342}
343
344define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_r(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
345; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_r:
346; CHECK:       # %bb.0:
347; CHECK-NEXT:    kmovd %edi, %k1
348; CHECK-NEXT:    vcvtpd2ph {rz-sae}, %zmm0, %xmm1 {%k1}
349; CHECK-NEXT:    vmovaps %xmm1, %xmm0
350; CHECK-NEXT:    vzeroupper
351; CHECK-NEXT:    retq
352  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 11)
353  ret <8 x half> %res
354}
355
356define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_load(ptr %px0, <8 x half> %x1, i8 %x2) {
357; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_load:
358; CHECK:       # %bb.0:
359; CHECK-NEXT:    kmovd %esi, %k1
360; CHECK-NEXT:    vcvtpd2phz (%rdi), %xmm0 {%k1}
361; CHECK-NEXT:    retq
362  %x0 = load <8 x double>, ptr %px0, align 64
363  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
364  ret <8 x half> %res
365}
366
367declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half>, <4 x float>, <8 x half>, i8, i32)
368
369define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
370; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round:
371; CHECK:       # %bb.0:
372; CHECK-NEXT:    kmovd %edi, %k1
373; CHECK-NEXT:    vcvtss2sh %xmm1, %xmm0, %xmm2 {%k1}
374; CHECK-NEXT:    vmovaps %xmm2, %xmm0
375; CHECK-NEXT:    retq
376  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 4)
377  ret <8 x half> %res
378}
379
380define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
381; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r:
382; CHECK:       # %bb.0:
383; CHECK-NEXT:    kmovd %edi, %k1
384; CHECK-NEXT:    vcvtss2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
385; CHECK-NEXT:    vmovaps %xmm2, %xmm0
386; CHECK-NEXT:    retq
387  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 11)
388  ret <8 x half> %res
389}
390
391define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2) {
392; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask:
393; CHECK:       # %bb.0:
394; CHECK-NEXT:    vcvtss2sh %xmm1, %xmm0, %xmm0
395; CHECK-NEXT:    retq
396  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 -1, i32 4)
397  ret <8 x half> %res
398}
399
400define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
401; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z:
402; CHECK:       # %bb.0:
403; CHECK-NEXT:    kmovd %edi, %k1
404; CHECK-NEXT:    vcvtss2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
405; CHECK-NEXT:    retq
406  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
407  ret <8 x half> %res
408}
409
410declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half>, <2 x double>, <8 x half>, i8, i32)
411
412define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
413; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round:
414; CHECK:       # %bb.0:
415; CHECK-NEXT:    kmovd %edi, %k1
416; CHECK-NEXT:    vcvtsd2sh %xmm1, %xmm0, %xmm2 {%k1}
417; CHECK-NEXT:    vmovaps %xmm2, %xmm0
418; CHECK-NEXT:    retq
419  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 4)
420  ret <8 x half> %res
421}
422
423define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
424; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r:
425; CHECK:       # %bb.0:
426; CHECK-NEXT:    kmovd %edi, %k1
427; CHECK-NEXT:    vcvtsd2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
428; CHECK-NEXT:    vmovaps %xmm2, %xmm0
429; CHECK-NEXT:    retq
430  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 11)
431  ret <8 x half> %res
432}
433
434define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2) {
435; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask:
436; CHECK:       # %bb.0:
437; CHECK-NEXT:    vcvtsd2sh %xmm1, %xmm0, %xmm0
438; CHECK-NEXT:    retq
439  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 -1, i32 4)
440  ret <8 x half> %res
441}
442
443define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
444; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z:
445; CHECK:       # %bb.0:
446; CHECK-NEXT:    kmovd %edi, %k1
447; CHECK-NEXT:    vcvtsd2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
448; CHECK-NEXT:    retq
449  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
450  ret <8 x half> %res
451}
452
453declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float>, <8 x half>, <4 x float>, i8, i32)
454
455define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
456; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round:
457; CHECK:       # %bb.0:
458; CHECK-NEXT:    kmovd %edi, %k1
459; CHECK-NEXT:    vcvtsh2ss %xmm1, %xmm0, %xmm2 {%k1}
460; CHECK-NEXT:    vmovaps %xmm2, %xmm0
461; CHECK-NEXT:    retq
462  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 4)
463  ret <4 x float> %res
464}
465
466define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
467; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r:
468; CHECK:       # %bb.0:
469; CHECK-NEXT:    kmovd %edi, %k1
470; CHECK-NEXT:    vcvtsh2ss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
471; CHECK-NEXT:    vmovaps %xmm2, %xmm0
472; CHECK-NEXT:    retq
473  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 8)
474  ret <4 x float> %res
475}
476
477define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2) {
478; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask:
479; CHECK:       # %bb.0:
480; CHECK-NEXT:    vcvtsh2ss %xmm1, %xmm0, %xmm0
481; CHECK-NEXT:    retq
482  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 -1, i32 4)
483  ret <4 x float> %res
484}
485
486define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
487; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z:
488; CHECK:       # %bb.0:
489; CHECK-NEXT:    kmovd %edi, %k1
490; CHECK-NEXT:    vcvtsh2ss %xmm1, %xmm0, %xmm0 {%k1} {z}
491; CHECK-NEXT:    retq
492  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> zeroinitializer, i8 %x2, i32 4)
493  ret <4 x float> %res
494}
495
496declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double>, <8 x half>, <2 x double>, i8, i32)
497
498define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
499; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    kmovd %edi, %k1
502; CHECK-NEXT:    vcvtsh2sd %xmm1, %xmm0, %xmm2 {%k1}
503; CHECK-NEXT:    vmovaps %xmm2, %xmm0
504; CHECK-NEXT:    retq
505  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 4)
506  ret <2 x double> %res
507}
508
509define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
510; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r:
511; CHECK:       # %bb.0:
512; CHECK-NEXT:    kmovd %edi, %k1
513; CHECK-NEXT:    vcvtsh2sd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
514; CHECK-NEXT:    vmovaps %xmm2, %xmm0
515; CHECK-NEXT:    retq
516  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 8)
517  ret <2 x double> %res
518}
519
520define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2) {
521; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask:
522; CHECK:       # %bb.0:
523; CHECK-NEXT:    vcvtsh2sd %xmm1, %xmm0, %xmm0
524; CHECK-NEXT:    retq
525  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 -1, i32 4)
526  ret <2 x double> %res
527}
528
529define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
530; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z:
531; CHECK:       # %bb.0:
532; CHECK-NEXT:    kmovd %edi, %k1
533; CHECK-NEXT:    vcvtsh2sd %xmm1, %xmm0, %xmm0 {%k1} {z}
534; CHECK-NEXT:    retq
535  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> zeroinitializer, i8 %x2, i32 4)
536  ret <2 x double> %res
537}
538
539declare <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half>, <16 x float>, i16, i32)
540
541define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512(<16 x half> %x0) {
542; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512:
543; CHECK:       # %bb.0:
544; CHECK-NEXT:    vcvtph2psx %ymm0, %zmm0
545; CHECK-NEXT:    retq
546  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 4)
547  ret <16 x float> %res
548}
549
550define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
551; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512:
552; CHECK:       # %bb.0:
553; CHECK-NEXT:    kmovd %edi, %k1
554; CHECK-NEXT:    vcvtph2psx %ymm0, %zmm1 {%k1}
555; CHECK-NEXT:    vmovaps %zmm1, %zmm0
556; CHECK-NEXT:    retq
557  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 4)
558  ret <16 x float> %res
559}
560
561define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512(<16 x half> %x0, i16 %x2) {
562; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512:
563; CHECK:       # %bb.0:
564; CHECK-NEXT:    kmovd %edi, %k1
565; CHECK-NEXT:    vcvtph2psx %ymm0, %zmm0 {%k1} {z}
566; CHECK-NEXT:    retq
567  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 4)
568  ret <16 x float> %res
569}
570
571define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512r(<16 x half> %x0) {
572; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512r:
573; CHECK:       # %bb.0:
574; CHECK-NEXT:    vcvtph2psx {sae}, %ymm0, %zmm0
575; CHECK-NEXT:    retq
576  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 8)
577  ret <16 x float> %res
578}
579
580define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512r(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
581; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512r:
582; CHECK:       # %bb.0:
583; CHECK-NEXT:    kmovd %edi, %k1
584; CHECK-NEXT:    vcvtph2psx {sae}, %ymm0, %zmm1 {%k1}
585; CHECK-NEXT:    vmovaps %zmm1, %zmm0
586; CHECK-NEXT:    retq
587  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 8)
588  ret <16 x float> %res
589}
590
591define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512r(<16 x half> %x0, i16 %x2) {
592; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512r:
593; CHECK:       # %bb.0:
594; CHECK-NEXT:    kmovd %edi, %k1
595; CHECK-NEXT:    vcvtph2psx {sae}, %ymm0, %zmm0 {%k1} {z}
596; CHECK-NEXT:    retq
597  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 8)
598  ret <16 x float> %res
599}
600
601declare <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float>, <16 x half>, i16, i32)
602
603define <16 x half> @test_int_x86_avx512_cvt_ps2phx_512(<16 x float> %x0) {
604; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_512:
605; CHECK:       # %bb.0:
606; CHECK-NEXT:    vcvtps2phx %zmm0, %ymm0
607; CHECK-NEXT:    retq
608  %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> undef, i16 -1, i32 4)
609  ret <16 x half> %res
610}
611
612define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
613; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512:
614; CHECK:       # %bb.0:
615; CHECK-NEXT:    kmovd %edi, %k1
616; CHECK-NEXT:    vcvtps2phx %zmm0, %ymm1 {%k1}
617; CHECK-NEXT:    vmovaps %ymm1, %ymm0
618; CHECK-NEXT:    retq
619  %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 4)
620  ret <16 x half> %res
621}
622
623define <16 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_512(<16 x float> %x0, i16 %x2) {
624; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_512:
625; CHECK:       # %bb.0:
626; CHECK-NEXT:    kmovd %edi, %k1
627; CHECK-NEXT:    vcvtps2phx %zmm0, %ymm0 {%k1} {z}
628; CHECK-NEXT:    retq
629  %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> zeroinitializer, i16 %x2, i32 4)
630  ret <16 x half> %res
631}
632
633define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512r(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
634; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512r:
635; CHECK:       # %bb.0:
636; CHECK-NEXT:    kmovd %edi, %k1
637; CHECK-NEXT:    vcvtps2phx {rd-sae}, %zmm0, %ymm1 {%k1}
638; CHECK-NEXT:    vcvtps2phx {ru-sae}, %zmm0, %ymm0
639; CHECK-NEXT:    vaddph %ymm0, %ymm1, %ymm0
640; CHECK-NEXT:    retq
641  %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 9)
642  %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 -1, i32 10)
643  %res2 = fadd <16 x half> %res, %res1
644  ret <16 x half> %res2
645}
646