1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s 3 4declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 5declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 6 7define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 8; CHECK-LABEL: test_x86_vfnmadd_ps_z: 9; CHECK: ## %bb.0: 10; CHECK-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 11; CHECK-NEXT: retq 12 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 13 ret <16 x float> %res 14} 15declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 16 17define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 18; CHECK-LABEL: test_mask_vfnmadd_ps: 19; CHECK: ## %bb.0: 20; CHECK-NEXT: kmovw %edi, %k1 21; CHECK-NEXT: vfnmadd132ps {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 22; CHECK-NEXT: retq 23 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 24 ret <16 x float> %res 25} 26 27define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 28; CHECK-LABEL: test_x86_vfnmadd_pd_z: 29; CHECK: ## %bb.0: 30; CHECK-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 31; CHECK-NEXT: retq 32 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 33 ret <8 x double> %res 34} 35declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 36 37define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 38; CHECK-LABEL: test_mask_vfnmadd_pd: 39; CHECK: ## %bb.0: 40; CHECK-NEXT: kmovw %edi, %k1 41; CHECK-NEXT: vfnmadd132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 42; CHECK-NEXT: retq 43 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 44 ret <8 x double> %res 45} 46 47define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 48; CHECK-LABEL: test_x86_vfnmsubps_z: 49; CHECK: ## %bb.0: 50; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 51; CHECK-NEXT: retq 52 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 53 ret <16 x float> %res 54} 55declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 56 57define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 58; CHECK-LABEL: test_mask_vfnmsub_ps: 59; CHECK: ## %bb.0: 60; CHECK-NEXT: kmovw %edi, %k1 61; CHECK-NEXT: vfnmsub132ps {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 62; CHECK-NEXT: retq 63 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 64 ret <16 x float> %res 65} 66 67define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 68; CHECK-LABEL: test_x86_vfnmsubpd_z: 69; CHECK: ## %bb.0: 70; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 71; CHECK-NEXT: retq 72 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 73 ret <8 x double> %res 74} 75declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 76 77define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 78; CHECK-LABEL: test_mask_vfnmsub_pd: 79; CHECK: ## %bb.0: 80; CHECK-NEXT: kmovw %edi, %k1 81; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 82; CHECK-NEXT: retq 83 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 84 ret <8 x double> %res 85} 86 87define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 88; CHECK-LABEL: test_x86_vfmaddsubps_z: 89; CHECK: ## %bb.0: 90; CHECK-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 91; CHECK-NEXT: retq 92 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 93 ret <16 x float> %res 94} 95 96define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 97; CHECK-LABEL: test_mask_fmaddsub_ps: 98; CHECK: ## %bb.0: 99; CHECK-NEXT: kmovw %edi, %k1 100; CHECK-NEXT: vfmaddsub132ps {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 101; CHECK-NEXT: retq 102 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4) 103 ret <16 x float> %res 104} 105 106declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 107 108define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 109; CHECK-LABEL: test_x86_vfmaddsubpd_z: 110; CHECK: ## %bb.0: 111; CHECK-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 112; CHECK-NEXT: retq 113 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 114 ret <8 x double> %res 115} 116declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 117 118define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 119; CHECK-LABEL: test_mask_vfmaddsub_pd: 120; CHECK: ## %bb.0: 121; CHECK-NEXT: kmovw %edi, %k1 122; CHECK-NEXT: vfmaddsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 123; CHECK-NEXT: retq 124 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 125 ret <8 x double> %res 126} 127 128define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 129; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512: 130; CHECK: ## %bb.0: 131; CHECK-NEXT: kmovw %edi, %k1 132; CHECK-NEXT: vfmaddsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 133; CHECK-NEXT: retq 134 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 135 ret <8 x double> %res 136} 137 138declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 139 140define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 141; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512: 142; CHECK: ## %bb.0: 143; CHECK-NEXT: kmovw %edi, %k1 144; CHECK-NEXT: vfmaddsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 145; CHECK-NEXT: vmovapd %zmm2, %zmm0 146; CHECK-NEXT: retq 147 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 148 ret <8 x double> %res 149} 150 151declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 152 153define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 154; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512: 155; CHECK: ## %bb.0: 156; CHECK-NEXT: kmovw %edi, %k1 157; CHECK-NEXT: vfmaddsub213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 158; CHECK-NEXT: retq 159 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 160 ret <8 x double> %res 161} 162 163define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 164; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512: 165; CHECK: ## %bb.0: 166; CHECK-NEXT: kmovw %edi, %k1 167; CHECK-NEXT: vfmaddsub132ps {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 168; CHECK-NEXT: retq 169 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 170 ret <16 x float> %res 171} 172 173declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 174 175define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 176; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512: 177; CHECK: ## %bb.0: 178; CHECK-NEXT: kmovw %edi, %k1 179; CHECK-NEXT: vfmaddsub231ps {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 180; CHECK-NEXT: vmovaps %zmm2, %zmm0 181; CHECK-NEXT: retq 182 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 183 ret <16 x float> %res 184} 185 186declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 187 188define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 189; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512: 190; CHECK: ## %bb.0: 191; CHECK-NEXT: kmovw %edi, %k1 192; CHECK-NEXT: vfmaddsub213ps {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 193; CHECK-NEXT: retq 194 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 195 ret <16 x float> %res 196} 197 198declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 199 200define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 201; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512: 202; CHECK: ## %bb.0: 203; CHECK-NEXT: kmovw %edi, %k1 204; CHECK-NEXT: vfmsubadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 205; CHECK-NEXT: vmovapd %zmm2, %zmm0 206; CHECK-NEXT: retq 207 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 208 ret <8 x double> %res 209} 210 211declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 212 213define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 214; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512: 215; CHECK: ## %bb.0: 216; CHECK-NEXT: kmovw %edi, %k1 217; CHECK-NEXT: vfmsubadd231ps {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 218; CHECK-NEXT: vmovaps %zmm2, %zmm0 219; CHECK-NEXT: retq 220 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 221 ret <16 x float> %res 222} 223 224define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 225; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne: 226; CHECK: ## %bb.0: 227; CHECK-NEXT: kmovw %edi, %k1 228; CHECK-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} 229; CHECK-NEXT: retq 230 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 8) nounwind 231 ret <16 x float> %res 232} 233 234define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 235; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn: 236; CHECK: ## %bb.0: 237; CHECK-NEXT: kmovw %edi, %k1 238; CHECK-NEXT: vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 239; CHECK-NEXT: retq 240 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 9) nounwind 241 ret <16 x float> %res 242} 243 244define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 245; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp: 246; CHECK: ## %bb.0: 247; CHECK-NEXT: kmovw %edi, %k1 248; CHECK-NEXT: vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 249; CHECK-NEXT: retq 250 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 10) nounwind 251 ret <16 x float> %res 252} 253 254define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 255; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz: 256; CHECK: ## %bb.0: 257; CHECK-NEXT: kmovw %edi, %k1 258; CHECK-NEXT: vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} 259; CHECK-NEXT: retq 260 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 11) nounwind 261 ret <16 x float> %res 262} 263 264define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 265; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current: 266; CHECK: ## %bb.0: 267; CHECK-NEXT: kmovw %edi, %k1 268; CHECK-NEXT: vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2 269; CHECK-NEXT: retq 270 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 271 ret <16 x float> %res 272} 273 274define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 275; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne: 276; CHECK: ## %bb.0: 277; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 278; CHECK-NEXT: retq 279 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 8) nounwind 280 ret <16 x float> %res 281} 282 283define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 284; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn: 285; CHECK: ## %bb.0: 286; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 287; CHECK-NEXT: retq 288 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 9) nounwind 289 ret <16 x float> %res 290} 291 292define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 293; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp: 294; CHECK: ## %bb.0: 295; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 296; CHECK-NEXT: retq 297 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 10) nounwind 298 ret <16 x float> %res 299} 300 301define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 302; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz: 303; CHECK: ## %bb.0: 304; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 305; CHECK-NEXT: retq 306 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 11) nounwind 307 ret <16 x float> %res 308} 309 310define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 311; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current: 312; CHECK: ## %bb.0: 313; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 314; CHECK-NEXT: retq 315 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 316 ret <16 x float> %res 317} 318 319declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 320 321define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 322; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512: 323; CHECK: ## %bb.0: 324; CHECK-NEXT: kmovw %edi, %k1 325; CHECK-NEXT: vfmsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) - zmm2 326; CHECK-NEXT: vmovapd %zmm2, %zmm0 327; CHECK-NEXT: retq 328 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 329 ret <8 x double> %res 330} 331 332declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 333 334define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 335; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512: 336; CHECK: ## %bb.0: 337; CHECK-NEXT: kmovw %edi, %k1 338; CHECK-NEXT: vfmsub231ps {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) - zmm2 339; CHECK-NEXT: vmovaps %zmm2, %zmm0 340; CHECK-NEXT: retq 341 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 342 ret <16 x float> %res 343} 344 345define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 346; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne: 347; CHECK: ## %bb.0: 348; CHECK-NEXT: kmovw %edi, %k1 349; CHECK-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} 350; CHECK-NEXT: retq 351 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 8) nounwind 352 ret <8 x double> %res 353} 354 355define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 356; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn: 357; CHECK: ## %bb.0: 358; CHECK-NEXT: kmovw %edi, %k1 359; CHECK-NEXT: vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 360; CHECK-NEXT: retq 361 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 9) nounwind 362 ret <8 x double> %res 363} 364 365define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 366; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp: 367; CHECK: ## %bb.0: 368; CHECK-NEXT: kmovw %edi, %k1 369; CHECK-NEXT: vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 370; CHECK-NEXT: retq 371 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 10) nounwind 372 ret <8 x double> %res 373} 374 375define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 376; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz: 377; CHECK: ## %bb.0: 378; CHECK-NEXT: kmovw %edi, %k1 379; CHECK-NEXT: vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} 380; CHECK-NEXT: retq 381 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 11) nounwind 382 ret <8 x double> %res 383} 384 385define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 386; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current: 387; CHECK: ## %bb.0: 388; CHECK-NEXT: kmovw %edi, %k1 389; CHECK-NEXT: vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2 390; CHECK-NEXT: retq 391 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 392 ret <8 x double> %res 393} 394 395define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 396; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne: 397; CHECK: ## %bb.0: 398; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 399; CHECK-NEXT: retq 400 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 8) nounwind 401 ret <8 x double> %res 402} 403 404define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 405; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn: 406; CHECK: ## %bb.0: 407; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 408; CHECK-NEXT: retq 409 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 9) nounwind 410 ret <8 x double> %res 411} 412 413define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 414; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp: 415; CHECK: ## %bb.0: 416; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 417; CHECK-NEXT: retq 418 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 10) nounwind 419 ret <8 x double> %res 420} 421 422define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 423; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz: 424; CHECK: ## %bb.0: 425; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 426; CHECK-NEXT: retq 427 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 11) nounwind 428 ret <8 x double> %res 429} 430 431define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 432; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current: 433; CHECK: ## %bb.0: 434; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 435; CHECK-NEXT: retq 436 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 437 ret <8 x double> %res 438} 439 440define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 441; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512: 442; CHECK: ## %bb.0: 443; CHECK-NEXT: kmovw %edi, %k1 444; CHECK-NEXT: vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2 445; CHECK-NEXT: retq 446 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 447 ret <8 x double> %res 448} 449 450declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 451 452define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 453; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512: 454; CHECK: ## %bb.0: 455; CHECK-NEXT: kmovw %edi, %k1 456; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) + zmm2 457; CHECK-NEXT: vmovapd %zmm2, %zmm0 458; CHECK-NEXT: retq 459 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 460 ret <8 x double> %res 461} 462 463declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 464 465define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 466; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512: 467; CHECK: ## %bb.0: 468; CHECK-NEXT: kmovw %edi, %k1 469; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 470; CHECK-NEXT: retq 471 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 472 ret <8 x double> %res 473} 474 475define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 476; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512: 477; CHECK: ## %bb.0: 478; CHECK-NEXT: kmovw %edi, %k1 479; CHECK-NEXT: vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2 480; CHECK-NEXT: retq 481 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 482 ret <16 x float> %res 483} 484 485declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 486 487define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 488; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512: 489; CHECK: ## %bb.0: 490; CHECK-NEXT: kmovw %edi, %k1 491; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) + zmm2 492; CHECK-NEXT: vmovaps %zmm2, %zmm0 493; CHECK-NEXT: retq 494 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 495 ret <16 x float> %res 496} 497 498declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 499 500define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 501; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512: 502; CHECK: ## %bb.0: 503; CHECK-NEXT: kmovw %edi, %k1 504; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 505; CHECK-NEXT: retq 506 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 507 ret <16 x float> %res 508} 509 510 511define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 512; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne: 513; CHECK: ## %bb.0: 514; CHECK-NEXT: kmovw %edi, %k1 515; CHECK-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} 516; CHECK-NEXT: retq 517 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 8) nounwind 518 ret <8 x double> %res 519} 520 521define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 522; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn: 523; CHECK: ## %bb.0: 524; CHECK-NEXT: kmovw %edi, %k1 525; CHECK-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 526; CHECK-NEXT: retq 527 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 9) nounwind 528 ret <8 x double> %res 529} 530 531define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 532; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp: 533; CHECK: ## %bb.0: 534; CHECK-NEXT: kmovw %edi, %k1 535; CHECK-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 536; CHECK-NEXT: retq 537 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 10) nounwind 538 ret <8 x double> %res 539} 540 541define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 542; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz: 543; CHECK: ## %bb.0: 544; CHECK-NEXT: kmovw %edi, %k1 545; CHECK-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} 546; CHECK-NEXT: retq 547 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 11) nounwind 548 ret <8 x double> %res 549} 550 551define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 552; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current: 553; CHECK: ## %bb.0: 554; CHECK-NEXT: kmovw %edi, %k1 555; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 556; CHECK-NEXT: retq 557 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 558 ret <8 x double> %res 559} 560 561define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 562; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne: 563; CHECK: ## %bb.0: 564; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 565; CHECK-NEXT: retq 566 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 8) nounwind 567 ret <8 x double> %res 568} 569 570define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 571; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn: 572; CHECK: ## %bb.0: 573; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 574; CHECK-NEXT: retq 575 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 9) nounwind 576 ret <8 x double> %res 577} 578 579define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 580; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp: 581; CHECK: ## %bb.0: 582; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 583; CHECK-NEXT: retq 584 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 10) nounwind 585 ret <8 x double> %res 586} 587 588define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 589; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz: 590; CHECK: ## %bb.0: 591; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 592; CHECK-NEXT: retq 593 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 11) nounwind 594 ret <8 x double> %res 595} 596 597define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 598; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current: 599; CHECK: ## %bb.0: 600; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 601; CHECK-NEXT: retq 602 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 603 ret <8 x double> %res 604} 605 606define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 607; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512: 608; CHECK: ## %bb.0: 609; CHECK-NEXT: kmovw %edi, %k1 610; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 611; CHECK-NEXT: retq 612 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 613 ret <8 x double> %res 614} 615 616declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 617 618define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 619; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512: 620; CHECK: ## %bb.0: 621; CHECK-NEXT: kmovw %edi, %k1 622; CHECK-NEXT: vfnmsub231pd {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 623; CHECK-NEXT: vmovapd %zmm2, %zmm0 624; CHECK-NEXT: retq 625 %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 626 ret <8 x double> %res 627} 628 629define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 630; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512: 631; CHECK: ## %bb.0: 632; CHECK-NEXT: kmovw %edi, %k1 633; CHECK-NEXT: vfnmsub132ps {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 634; CHECK-NEXT: retq 635 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 636 ret <16 x float> %res 637} 638 639declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 640 641define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 642; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512: 643; CHECK: ## %bb.0: 644; CHECK-NEXT: kmovw %edi, %k1 645; CHECK-NEXT: vfnmsub231ps {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 646; CHECK-NEXT: vmovaps %zmm2, %zmm0 647; CHECK-NEXT: retq 648 %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 649 ret <16 x float> %res 650} 651 652define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 653; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512: 654; CHECK: ## %bb.0: 655; CHECK-NEXT: kmovw %edi, %k1 656; CHECK-NEXT: vfnmadd132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 657; CHECK-NEXT: retq 658 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 659 ret <8 x double> %res 660} 661 662define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 663; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512: 664; CHECK: ## %bb.0: 665; CHECK-NEXT: kmovw %edi, %k1 666; CHECK-NEXT: vfnmadd132ps {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 667; CHECK-NEXT: retq 668 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 669 ret <16 x float> %res 670} 671 672define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512_rne(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 673; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512_rne: 674; CHECK: ## %bb.0: 675; CHECK-NEXT: kmovw %edi, %k1 676; CHECK-NEXT: vfmaddsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} 677; CHECK-NEXT: retq 678 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 8) 679 ret <8 x double> %res 680} 681 682define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512_rne(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 683; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512_rne: 684; CHECK: ## %bb.0: 685; CHECK-NEXT: kmovw %edi, %k1 686; CHECK-NEXT: vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 687; CHECK-NEXT: vmovapd %zmm2, %zmm0 688; CHECK-NEXT: retq 689 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 8) 690 ret <8 x double> %res 691} 692 693define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512_rne(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 694; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512_rne: 695; CHECK: ## %bb.0: 696; CHECK-NEXT: kmovw %edi, %k1 697; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} 698; CHECK-NEXT: retq 699 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 8) 700 ret <8 x double> %res 701} 702 703define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512_rne(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 704; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512_rne: 705; CHECK: ## %bb.0: 706; CHECK-NEXT: kmovw %edi, %k1 707; CHECK-NEXT: vfmaddsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} 708; CHECK-NEXT: retq 709 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 8) 710 ret <16 x float> %res 711} 712 713define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512_rne(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 714; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512_rne: 715; CHECK: ## %bb.0: 716; CHECK-NEXT: kmovw %edi, %k1 717; CHECK-NEXT: vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 718; CHECK-NEXT: vmovaps %zmm2, %zmm0 719; CHECK-NEXT: retq 720 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 8) 721 ret <16 x float> %res 722} 723 724define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512_rne(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 725; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512_rne: 726; CHECK: ## %bb.0: 727; CHECK-NEXT: kmovw %edi, %k1 728; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} 729; CHECK-NEXT: retq 730 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 8) 731 ret <16 x float> %res 732} 733 734define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512_rne(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 735; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512_rne: 736; CHECK: ## %bb.0: 737; CHECK-NEXT: kmovw %edi, %k1 738; CHECK-NEXT: vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 739; CHECK-NEXT: vmovapd %zmm2, %zmm0 740; CHECK-NEXT: retq 741 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 8) 742 ret <8 x double> %res 743} 744 745define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512_rne(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 746; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512_rne: 747; CHECK: ## %bb.0: 748; CHECK-NEXT: kmovw %edi, %k1 749; CHECK-NEXT: vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 750; CHECK-NEXT: vmovaps %zmm2, %zmm0 751; CHECK-NEXT: retq 752 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 8) 753 ret <16 x float> %res 754} 755