1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s 3 4declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 5declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 6declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 7declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 8 9define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load0(ptr %x0ptr, <4 x float> %x1, <4 x float> %x2) { 10; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load0: 11; CHECK: ## %bb.0: 12; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm1 = (xmm0 * mem) + xmm1 13; CHECK-NEXT: vmovaps %xmm1, %xmm0 14; CHECK-NEXT: retq 15 %x0 = load <4 x float>, ptr %x0ptr 16 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 17 ret <4 x float> %res 18} 19 20define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load1(<4 x float> %x0, ptr %x1ptr, <4 x float> %x2){ 21; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load1: 22; CHECK: ## %bb.0: 23; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm1 = (xmm0 * mem) + xmm1 24; CHECK-NEXT: vmovaps %xmm1, %xmm0 25; CHECK-NEXT: retq 26 %x1 = load <4 x float>, ptr %x1ptr 27 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 28 ret <4 x float> %res 29} 30 31define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load0(ptr %x0ptr, <2 x double> %x1, <2 x double> %x2) { 32; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load0: 33; CHECK: ## %bb.0: 34; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm1 = (xmm0 * mem) + xmm1 35; CHECK-NEXT: vmovapd %xmm1, %xmm0 36; CHECK-NEXT: retq 37 %x0 = load <2 x double>, ptr %x0ptr 38 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 39 ret <2 x double> %res 40} 41 42define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load1(<2 x double> %x0, ptr %x1ptr, <2 x double> %x2){ 43; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load1: 44; CHECK: ## %bb.0: 45; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm1 = (xmm0 * mem) + xmm1 46; CHECK-NEXT: vmovapd %xmm1, %xmm0 47; CHECK-NEXT: retq 48 %x1 = load <2 x double>, ptr %x1ptr 49 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 50 ret <2 x double> %res 51} 52 53define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load0(ptr %x0ptr, <4 x float> %x1, <4 x float> %x2) { 54; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load0: 55; CHECK: ## %bb.0: 56; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm1 = (xmm0 * mem) - xmm1 57; CHECK-NEXT: vmovaps %xmm1, %xmm0 58; CHECK-NEXT: retq 59 %x0 = load <4 x float>, ptr %x0ptr 60 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 61 ret <4 x float> %res 62} 63 64define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load1(<4 x float> %x0, ptr %x1ptr, <4 x float> %x2){ 65; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load1: 66; CHECK: ## %bb.0: 67; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm1 = (xmm0 * mem) - xmm1 68; CHECK-NEXT: vmovaps %xmm1, %xmm0 69; CHECK-NEXT: retq 70 %x1 = load <4 x float>, ptr %x1ptr 71 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 72 ret <4 x float> %res 73} 74 75define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load0(ptr %x0ptr, <2 x double> %x1, <2 x double> %x2) { 76; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load0: 77; CHECK: ## %bb.0: 78; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm1 = (xmm0 * mem) - xmm1 79; CHECK-NEXT: vmovapd %xmm1, %xmm0 80; CHECK-NEXT: retq 81 %x0 = load <2 x double>, ptr %x0ptr 82 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 83 ret <2 x double> %res 84} 85 86define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load1(<2 x double> %x0, ptr %x1ptr, <2 x double> %x2){ 87; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load1: 88; CHECK: ## %bb.0: 89; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm1 = (xmm0 * mem) - xmm1 90; CHECK-NEXT: vmovapd %xmm1, %xmm0 91; CHECK-NEXT: retq 92 %x1 = load <2 x double>, ptr %x1ptr 93 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 94 ret <2 x double> %res 95} 96