xref: /llvm-project/llvm/test/CodeGen/X86/avx512-fma-intrinsics-upgrade.ll (revision 42fc7852f53bfd3549442f6ae9087e7e08c2f0cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
3
4declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
5declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
6
7define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
8; CHECK-LABEL: test_x86_vfnmadd_ps_z:
9; CHECK:       ## %bb.0:
10; CHECK-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
11; CHECK-NEXT:    retq
12  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
13  ret <16 x float> %res
14}
15declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
16
17define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
18; CHECK-LABEL: test_mask_vfnmadd_ps:
19; CHECK:       ## %bb.0:
20; CHECK-NEXT:    kmovw %edi, %k1
21; CHECK-NEXT:    vfnmadd132ps {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
22; CHECK-NEXT:    retq
23  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
24  ret <16 x float> %res
25}
26
27define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
28; CHECK-LABEL: test_x86_vfnmadd_pd_z:
29; CHECK:       ## %bb.0:
30; CHECK-NEXT:    vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
31; CHECK-NEXT:    retq
32  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
33  ret <8 x double> %res
34}
35declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
36
37define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
38; CHECK-LABEL: test_mask_vfnmadd_pd:
39; CHECK:       ## %bb.0:
40; CHECK-NEXT:    kmovw %edi, %k1
41; CHECK-NEXT:    vfnmadd132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
42; CHECK-NEXT:    retq
43  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
44  ret <8 x double> %res
45}
46
47define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
48; CHECK-LABEL: test_x86_vfnmsubps_z:
49; CHECK:       ## %bb.0:
50; CHECK-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
51; CHECK-NEXT:    retq
52  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
53  ret <16 x float> %res
54}
55declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
56
57define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
58; CHECK-LABEL: test_mask_vfnmsub_ps:
59; CHECK:       ## %bb.0:
60; CHECK-NEXT:    kmovw %edi, %k1
61; CHECK-NEXT:    vfnmsub132ps {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
62; CHECK-NEXT:    retq
63  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
64  ret <16 x float> %res
65}
66
67define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
68; CHECK-LABEL: test_x86_vfnmsubpd_z:
69; CHECK:       ## %bb.0:
70; CHECK-NEXT:    vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
71; CHECK-NEXT:    retq
72  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
73  ret <8 x double> %res
74}
75declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
76
77define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
78; CHECK-LABEL: test_mask_vfnmsub_pd:
79; CHECK:       ## %bb.0:
80; CHECK-NEXT:    kmovw %edi, %k1
81; CHECK-NEXT:    vfnmsub132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
82; CHECK-NEXT:    retq
83  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
84  ret <8 x double> %res
85}
86
87define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
88; CHECK-LABEL: test_x86_vfmaddsubps_z:
89; CHECK:       ## %bb.0:
90; CHECK-NEXT:    vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
91; CHECK-NEXT:    retq
92  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
93  ret <16 x float> %res
94}
95
96define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
97; CHECK-LABEL: test_mask_fmaddsub_ps:
98; CHECK:       ## %bb.0:
99; CHECK-NEXT:    kmovw %edi, %k1
100; CHECK-NEXT:    vfmaddsub132ps {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
101; CHECK-NEXT:    retq
102  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
103  ret <16 x float> %res
104}
105
106declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
107
108define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
109; CHECK-LABEL: test_x86_vfmaddsubpd_z:
110; CHECK:       ## %bb.0:
111; CHECK-NEXT:    vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
112; CHECK-NEXT:    retq
113  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
114  ret <8 x double> %res
115}
116declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
117
118define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
119; CHECK-LABEL: test_mask_vfmaddsub_pd:
120; CHECK:       ## %bb.0:
121; CHECK-NEXT:    kmovw %edi, %k1
122; CHECK-NEXT:    vfmaddsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
123; CHECK-NEXT:    retq
124  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
125  ret <8 x double> %res
126}
127
128define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
129; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
130; CHECK:       ## %bb.0:
131; CHECK-NEXT:    kmovw %edi, %k1
132; CHECK-NEXT:    vfmaddsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
133; CHECK-NEXT:    retq
134  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
135  ret <8 x double> %res
136}
137
138declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
139
140define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
141; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
142; CHECK:       ## %bb.0:
143; CHECK-NEXT:    kmovw %edi, %k1
144; CHECK-NEXT:    vfmaddsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
145; CHECK-NEXT:    vmovapd %zmm2, %zmm0
146; CHECK-NEXT:    retq
147  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
148  ret <8 x double> %res
149}
150
151declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
152
153define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
154; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
155; CHECK:       ## %bb.0:
156; CHECK-NEXT:    kmovw %edi, %k1
157; CHECK-NEXT:    vfmaddsub213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
158; CHECK-NEXT:    retq
159  %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
160  ret <8 x double> %res
161}
162
163define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
164; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
165; CHECK:       ## %bb.0:
166; CHECK-NEXT:    kmovw %edi, %k1
167; CHECK-NEXT:    vfmaddsub132ps {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
168; CHECK-NEXT:    retq
169  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
170  ret <16 x float> %res
171}
172
173declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
174
175define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
176; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
177; CHECK:       ## %bb.0:
178; CHECK-NEXT:    kmovw %edi, %k1
179; CHECK-NEXT:    vfmaddsub231ps {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
180; CHECK-NEXT:    vmovaps %zmm2, %zmm0
181; CHECK-NEXT:    retq
182  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
183  ret <16 x float> %res
184}
185
186declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
187
188define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
189; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
190; CHECK:       ## %bb.0:
191; CHECK-NEXT:    kmovw %edi, %k1
192; CHECK-NEXT:    vfmaddsub213ps {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
193; CHECK-NEXT:    retq
194  %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
195  ret <16 x float> %res
196}
197
198declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
199
200define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
201; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
202; CHECK:       ## %bb.0:
203; CHECK-NEXT:    kmovw %edi, %k1
204; CHECK-NEXT:    vfmsubadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
205; CHECK-NEXT:    vmovapd %zmm2, %zmm0
206; CHECK-NEXT:    retq
207  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
208  ret <8 x double> %res
209}
210
211declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
212
213define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
214; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
215; CHECK:       ## %bb.0:
216; CHECK-NEXT:    kmovw %edi, %k1
217; CHECK-NEXT:    vfmsubadd231ps {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
218; CHECK-NEXT:    vmovaps %zmm2, %zmm0
219; CHECK-NEXT:    retq
220  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
221  ret <16 x float> %res
222}
223
224define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
225; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne:
226; CHECK:       ## %bb.0:
227; CHECK-NEXT:    kmovw %edi, %k1
228; CHECK-NEXT:    vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
229; CHECK-NEXT:    retq
230  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 8) nounwind
231  ret <16 x float> %res
232}
233
234define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
235; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn:
236; CHECK:       ## %bb.0:
237; CHECK-NEXT:    kmovw %edi, %k1
238; CHECK-NEXT:    vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
239; CHECK-NEXT:    retq
240  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 9) nounwind
241  ret <16 x float> %res
242}
243
244define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
245; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp:
246; CHECK:       ## %bb.0:
247; CHECK-NEXT:    kmovw %edi, %k1
248; CHECK-NEXT:    vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
249; CHECK-NEXT:    retq
250  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 10) nounwind
251  ret <16 x float> %res
252}
253
254define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
255; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz:
256; CHECK:       ## %bb.0:
257; CHECK-NEXT:    kmovw %edi, %k1
258; CHECK-NEXT:    vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
259; CHECK-NEXT:    retq
260  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 11) nounwind
261  ret <16 x float> %res
262}
263
264define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
265; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current:
266; CHECK:       ## %bb.0:
267; CHECK-NEXT:    kmovw %edi, %k1
268; CHECK-NEXT:    vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2
269; CHECK-NEXT:    retq
270  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
271  ret <16 x float> %res
272}
273
274define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
275; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne:
276; CHECK:       ## %bb.0:
277; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
278; CHECK-NEXT:    retq
279  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 8) nounwind
280  ret <16 x float> %res
281}
282
283define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
284; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn:
285; CHECK:       ## %bb.0:
286; CHECK-NEXT:    vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0
287; CHECK-NEXT:    retq
288  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 9) nounwind
289  ret <16 x float> %res
290}
291
292define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
293; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp:
294; CHECK:       ## %bb.0:
295; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
296; CHECK-NEXT:    retq
297  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 10) nounwind
298  ret <16 x float> %res
299}
300
301define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
302; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz:
303; CHECK:       ## %bb.0:
304; CHECK-NEXT:    vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0
305; CHECK-NEXT:    retq
306  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 11) nounwind
307  ret <16 x float> %res
308}
309
310define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
311; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current:
312; CHECK:       ## %bb.0:
313; CHECK-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
314; CHECK-NEXT:    retq
315  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
316  ret <16 x float> %res
317}
318
319declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
320
321define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
322; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
323; CHECK:       ## %bb.0:
324; CHECK-NEXT:    kmovw %edi, %k1
325; CHECK-NEXT:    vfmsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) - zmm2
326; CHECK-NEXT:    vmovapd %zmm2, %zmm0
327; CHECK-NEXT:    retq
328  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
329  ret <8 x double> %res
330}
331
332declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
333
334define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
335; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
336; CHECK:       ## %bb.0:
337; CHECK-NEXT:    kmovw %edi, %k1
338; CHECK-NEXT:    vfmsub231ps {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) - zmm2
339; CHECK-NEXT:    vmovaps %zmm2, %zmm0
340; CHECK-NEXT:    retq
341  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
342  ret <16 x float> %res
343}
344
345define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
346; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne:
347; CHECK:       ## %bb.0:
348; CHECK-NEXT:    kmovw %edi, %k1
349; CHECK-NEXT:    vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
350; CHECK-NEXT:    retq
351  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 8) nounwind
352  ret <8 x double> %res
353}
354
355define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
356; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn:
357; CHECK:       ## %bb.0:
358; CHECK-NEXT:    kmovw %edi, %k1
359; CHECK-NEXT:    vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
360; CHECK-NEXT:    retq
361  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 9) nounwind
362  ret <8 x double> %res
363}
364
365define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
366; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp:
367; CHECK:       ## %bb.0:
368; CHECK-NEXT:    kmovw %edi, %k1
369; CHECK-NEXT:    vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
370; CHECK-NEXT:    retq
371  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 10) nounwind
372  ret <8 x double> %res
373}
374
375define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
376; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz:
377; CHECK:       ## %bb.0:
378; CHECK-NEXT:    kmovw %edi, %k1
379; CHECK-NEXT:    vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
380; CHECK-NEXT:    retq
381  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 11) nounwind
382  ret <8 x double> %res
383}
384
385define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
386; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current:
387; CHECK:       ## %bb.0:
388; CHECK-NEXT:    kmovw %edi, %k1
389; CHECK-NEXT:    vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2
390; CHECK-NEXT:    retq
391  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
392  ret <8 x double> %res
393}
394
395define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
396; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne:
397; CHECK:       ## %bb.0:
398; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
399; CHECK-NEXT:    retq
400  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 8) nounwind
401  ret <8 x double> %res
402}
403
404define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
405; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn:
406; CHECK:       ## %bb.0:
407; CHECK-NEXT:    vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0
408; CHECK-NEXT:    retq
409  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 9) nounwind
410  ret <8 x double> %res
411}
412
413define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
414; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp:
415; CHECK:       ## %bb.0:
416; CHECK-NEXT:    vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0
417; CHECK-NEXT:    retq
418  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 10) nounwind
419  ret <8 x double> %res
420}
421
422define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
423; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz:
424; CHECK:       ## %bb.0:
425; CHECK-NEXT:    vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0
426; CHECK-NEXT:    retq
427  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 11) nounwind
428  ret <8 x double> %res
429}
430
431define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
432; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current:
433; CHECK:       ## %bb.0:
434; CHECK-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
435; CHECK-NEXT:    retq
436  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
437  ret <8 x double> %res
438}
439
440define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
441; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
442; CHECK:       ## %bb.0:
443; CHECK-NEXT:    kmovw %edi, %k1
444; CHECK-NEXT:    vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2
445; CHECK-NEXT:    retq
446  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
447  ret <8 x double> %res
448}
449
450declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
451
452define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
453; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
454; CHECK:       ## %bb.0:
455; CHECK-NEXT:    kmovw %edi, %k1
456; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) + zmm2
457; CHECK-NEXT:    vmovapd %zmm2, %zmm0
458; CHECK-NEXT:    retq
459  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
460  ret <8 x double> %res
461}
462
463declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
464
465define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
466; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
467; CHECK:       ## %bb.0:
468; CHECK-NEXT:    kmovw %edi, %k1
469; CHECK-NEXT:    vfmadd213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
470; CHECK-NEXT:    retq
471  %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
472  ret <8 x double> %res
473}
474
475define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
476; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
477; CHECK:       ## %bb.0:
478; CHECK-NEXT:    kmovw %edi, %k1
479; CHECK-NEXT:    vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2
480; CHECK-NEXT:    retq
481  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
482  ret <16 x float> %res
483}
484
485declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
486
487define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
488; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
489; CHECK:       ## %bb.0:
490; CHECK-NEXT:    kmovw %edi, %k1
491; CHECK-NEXT:    vfmadd231ps {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) + zmm2
492; CHECK-NEXT:    vmovaps %zmm2, %zmm0
493; CHECK-NEXT:    retq
494  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
495  ret <16 x float> %res
496}
497
498declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
499
500define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
501; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
502; CHECK:       ## %bb.0:
503; CHECK-NEXT:    kmovw %edi, %k1
504; CHECK-NEXT:    vfmadd213ps {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
505; CHECK-NEXT:    retq
506  %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
507  ret <16 x float> %res
508}
509
510
511define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
512; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
513; CHECK:       ## %bb.0:
514; CHECK-NEXT:    kmovw %edi, %k1
515; CHECK-NEXT:    vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
516; CHECK-NEXT:    retq
517  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 8) nounwind
518  ret <8 x double> %res
519}
520
521define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
522; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn:
523; CHECK:       ## %bb.0:
524; CHECK-NEXT:    kmovw %edi, %k1
525; CHECK-NEXT:    vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
526; CHECK-NEXT:    retq
527  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 9) nounwind
528  ret <8 x double> %res
529}
530
531define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
532; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp:
533; CHECK:       ## %bb.0:
534; CHECK-NEXT:    kmovw %edi, %k1
535; CHECK-NEXT:    vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
536; CHECK-NEXT:    retq
537  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 10) nounwind
538  ret <8 x double> %res
539}
540
541define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
542; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz:
543; CHECK:       ## %bb.0:
544; CHECK-NEXT:    kmovw %edi, %k1
545; CHECK-NEXT:    vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
546; CHECK-NEXT:    retq
547  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 11) nounwind
548  ret <8 x double> %res
549}
550
551define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
552; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current:
553; CHECK:       ## %bb.0:
554; CHECK-NEXT:    kmovw %edi, %k1
555; CHECK-NEXT:    vfnmsub132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
556; CHECK-NEXT:    retq
557  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
558  ret <8 x double> %res
559}
560
561define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
562; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne:
563; CHECK:       ## %bb.0:
564; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
565; CHECK-NEXT:    retq
566  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 8) nounwind
567  ret <8 x double> %res
568}
569
570define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
571; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn:
572; CHECK:       ## %bb.0:
573; CHECK-NEXT:    vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0
574; CHECK-NEXT:    retq
575  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 9) nounwind
576  ret <8 x double> %res
577}
578
579define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
580; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp:
581; CHECK:       ## %bb.0:
582; CHECK-NEXT:    vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0
583; CHECK-NEXT:    retq
584  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 10) nounwind
585  ret <8 x double> %res
586}
587
588define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
589; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz:
590; CHECK:       ## %bb.0:
591; CHECK-NEXT:    vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0
592; CHECK-NEXT:    retq
593  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 11) nounwind
594  ret <8 x double> %res
595}
596
597define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
598; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current:
599; CHECK:       ## %bb.0:
600; CHECK-NEXT:    vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
601; CHECK-NEXT:    retq
602  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
603  ret <8 x double> %res
604}
605
606define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
607; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
608; CHECK:       ## %bb.0:
609; CHECK-NEXT:    kmovw %edi, %k1
610; CHECK-NEXT:    vfnmsub132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
611; CHECK-NEXT:    retq
612  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
613  ret <8 x double> %res
614}
615
616declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
617
618define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
619; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
620; CHECK:       ## %bb.0:
621; CHECK-NEXT:    kmovw %edi, %k1
622; CHECK-NEXT:    vfnmsub231pd {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
623; CHECK-NEXT:    vmovapd %zmm2, %zmm0
624; CHECK-NEXT:    retq
625  %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
626  ret <8 x double> %res
627}
628
629define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
630; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
631; CHECK:       ## %bb.0:
632; CHECK-NEXT:    kmovw %edi, %k1
633; CHECK-NEXT:    vfnmsub132ps {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
634; CHECK-NEXT:    retq
635  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
636  ret <16 x float> %res
637}
638
639declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
640
641define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
642; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
643; CHECK:       ## %bb.0:
644; CHECK-NEXT:    kmovw %edi, %k1
645; CHECK-NEXT:    vfnmsub231ps {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
646; CHECK-NEXT:    vmovaps %zmm2, %zmm0
647; CHECK-NEXT:    retq
648  %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
649  ret <16 x float> %res
650}
651
652define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
653; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
654; CHECK:       ## %bb.0:
655; CHECK-NEXT:    kmovw %edi, %k1
656; CHECK-NEXT:    vfnmadd132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
657; CHECK-NEXT:    retq
658  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
659  ret <8 x double> %res
660}
661
662define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
663; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
664; CHECK:       ## %bb.0:
665; CHECK-NEXT:    kmovw %edi, %k1
666; CHECK-NEXT:    vfnmadd132ps {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
667; CHECK-NEXT:    retq
668  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
669  ret <16 x float> %res
670}
671
672define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512_rne(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
673; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512_rne:
674; CHECK:       ## %bb.0:
675; CHECK-NEXT:    kmovw %edi, %k1
676; CHECK-NEXT:    vfmaddsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
677; CHECK-NEXT:    retq
678  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 8)
679  ret <8 x double> %res
680}
681
682define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512_rne(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
683; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512_rne:
684; CHECK:       ## %bb.0:
685; CHECK-NEXT:    kmovw %edi, %k1
686; CHECK-NEXT:    vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
687; CHECK-NEXT:    vmovapd %zmm2, %zmm0
688; CHECK-NEXT:    retq
689  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 8)
690  ret <8 x double> %res
691}
692
693define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512_rne(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
694; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512_rne:
695; CHECK:       ## %bb.0:
696; CHECK-NEXT:    kmovw %edi, %k1
697; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
698; CHECK-NEXT:    retq
699  %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 8)
700  ret <8 x double> %res
701}
702
703define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512_rne(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
704; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512_rne:
705; CHECK:       ## %bb.0:
706; CHECK-NEXT:    kmovw %edi, %k1
707; CHECK-NEXT:    vfmaddsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
708; CHECK-NEXT:    retq
709  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 8)
710  ret <16 x float> %res
711}
712
713define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512_rne(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
714; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512_rne:
715; CHECK:       ## %bb.0:
716; CHECK-NEXT:    kmovw %edi, %k1
717; CHECK-NEXT:    vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
718; CHECK-NEXT:    vmovaps %zmm2, %zmm0
719; CHECK-NEXT:    retq
720  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 8)
721  ret <16 x float> %res
722}
723
724define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512_rne(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
725; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512_rne:
726; CHECK:       ## %bb.0:
727; CHECK-NEXT:    kmovw %edi, %k1
728; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
729; CHECK-NEXT:    retq
730  %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 8)
731  ret <16 x float> %res
732}
733
734define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512_rne(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
735; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512_rne:
736; CHECK:       ## %bb.0:
737; CHECK-NEXT:    kmovw %edi, %k1
738; CHECK-NEXT:    vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
739; CHECK-NEXT:    vmovapd %zmm2, %zmm0
740; CHECK-NEXT:    retq
741  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 8)
742  ret <8 x double> %res
743}
744
745define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512_rne(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
746; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512_rne:
747; CHECK:       ## %bb.0:
748; CHECK-NEXT:    kmovw %edi, %k1
749; CHECK-NEXT:    vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
750; CHECK-NEXT:    vmovaps %zmm2, %zmm0
751; CHECK-NEXT:    retq
752  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 8)
753  ret <16 x float> %res
754}
755