1; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s 2; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m7 -fp-contract=fast | FileCheck %s 3; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m4 -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE 4; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m33 -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE 5 6; Check generated fused MAC and MLS. 7 8define arm_aapcs_vfpcc double @fusedMACTest1(double %d1, double %d2, double %d3) { 9;CHECK-LABEL: fusedMACTest1: 10;CHECK: vfma.f64 11 %1 = fmul double %d1, %d2 12 %2 = fadd double %1, %d3 13 ret double %2 14} 15 16define arm_aapcs_vfpcc float @fusedMACTest2(float %f1, float %f2, float %f3) { 17;CHECK-LABEL: fusedMACTest2: 18;CHECK: vfma.f32 19 20;DONT-FUSE-LABEL: fusedMACTest2: 21;DONT-FUSE: vmul.f32 22;DONT-FUSE-NEXT: vadd.f32 23 24 %1 = fmul float %f1, %f2 25 %2 = fadd float %1, %f3 26 ret float %2 27} 28 29define arm_aapcs_vfpcc double @fusedMACTest3(double %d1, double %d2, double %d3) { 30;CHECK-LABEL: fusedMACTest3: 31;CHECK: vfms.f64 32 %1 = fmul double %d2, %d3 33 %2 = fsub double %d1, %1 34 ret double %2 35} 36 37define arm_aapcs_vfpcc float @fusedMACTest4(float %f1, float %f2, float %f3) { 38;CHECK-LABEL: fusedMACTest4: 39;CHECK: vfms.f32 40 %1 = fmul float %f2, %f3 41 %2 = fsub float %f1, %1 42 ret float %2 43} 44 45define arm_aapcs_vfpcc double @fusedMACTest5(double %d1, double %d2, double %d3) { 46;CHECK-LABEL: fusedMACTest5: 47;CHECK: vfnma.f64 48 %1 = fmul double %d1, %d2 49 %2 = fsub double -0.0, %1 50 %3 = fsub double %2, %d3 51 ret double %3 52} 53 54define arm_aapcs_vfpcc float @fusedMACTest6(float %f1, float %f2, float %f3) { 55;CHECK-LABEL: fusedMACTest6: 56;CHECK: vfnma.f32 57 %1 = fmul float %f1, %f2 58 %2 = fsub float -0.0, %1 59 %3 = fsub float %2, %f3 60 ret float %3 61} 62 63define arm_aapcs_vfpcc double @fusedMACTest7(double %d1, double %d2, double %d3) { 64;CHECK-LABEL: fusedMACTest7: 65;CHECK: vfnms.f64 66 %1 = fmul double %d1, %d2 67 %2 = fsub double %1, %d3 68 ret double %2 69} 70 71define arm_aapcs_vfpcc float @fusedMACTest8(float %f1, float %f2, float %f3) { 72;CHECK-LABEL: fusedMACTest8: 73;CHECK: vfnms.f32 74 %1 = fmul float %f1, %f2 75 %2 = fsub float %1, %f3 76 ret float %2 77} 78 79define arm_aapcs_vfpcc <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) { 80;CHECK-LABEL: fusedMACTest9: 81;CHECK: vfma.f32 82 %mul = fmul <2 x float> %a, %b 83 %add = fadd <2 x float> %mul, %a 84 ret <2 x float> %add 85} 86 87define arm_aapcs_vfpcc <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) { 88;CHECK-LABEL: fusedMACTest10: 89;CHECK: vfms.f32 90 %mul = fmul <2 x float> %a, %b 91 %sub = fsub <2 x float> %a, %mul 92 ret <2 x float> %sub 93} 94 95define arm_aapcs_vfpcc <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) { 96;CHECK-LABEL: fusedMACTest11: 97;CHECK: vfma.f32 98 %mul = fmul <4 x float> %a, %b 99 %add = fadd <4 x float> %mul, %a 100 ret <4 x float> %add 101} 102 103define arm_aapcs_vfpcc <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) { 104;CHECK-LABEL: fusedMACTest12: 105;CHECK: vfms.f32 106 %mul = fmul <4 x float> %a, %b 107 %sub = fsub <4 x float> %a, %mul 108 ret <4 x float> %sub 109} 110 111define arm_aapcs_vfpcc float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp { 112entry: 113; CHECK: test_fma_f32 114; CHECK: vfma.f32 115 %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone 116 ret float %tmp1 117} 118 119define arm_aapcs_vfpcc double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp { 120entry: 121; CHECK: test_fma_f64 122; CHECK: vfma.f64 123 %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone 124 ret double %tmp1 125} 126 127define arm_aapcs_vfpcc <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { 128entry: 129; CHECK: test_fma_v2f32 130; CHECK: vfma.f32 131 %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind 132 ret <2 x float> %tmp1 133} 134 135define arm_aapcs_vfpcc double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp { 136entry: 137; CHECK: test_fms_f64 138; CHECK: vfms.f64 139 %tmp1 = fsub double -0.0, %a 140 %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone 141 ret double %tmp2 142} 143 144define arm_aapcs_vfpcc double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { 145entry: 146; CHECK: test_fms_f64_2 147; CHECK: vfms.f64 148 %tmp1 = fsub double -0.0, %b 149 %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone 150 ret double %tmp2 151} 152 153define arm_aapcs_vfpcc float @test_fnms_f32(float %a, float %b, ptr %c) nounwind readnone ssp { 154; CHECK: test_fnms_f32 155; CHECK: vfnms.f32 156 %tmp1 = load float, ptr %c, align 4 157 %tmp2 = fsub float -0.0, %tmp1 158 %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone 159 ret float %tmp3 160} 161 162define arm_aapcs_vfpcc double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp { 163entry: 164; CHECK: test_fnms_f64 165; CHECK: vfnms.f64 166 %tmp1 = fsub double -0.0, %a 167 %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone 168 %tmp3 = fsub double -0.0, %tmp2 169 ret double %tmp3 170} 171 172define arm_aapcs_vfpcc double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { 173entry: 174; CHECK: test_fnms_f64_2 175; CHECK: vfnms.f64 176 %tmp1 = fsub double -0.0, %b 177 %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone 178 %tmp3 = fsub double -0.0, %tmp2 179 ret double %tmp3 180} 181 182define arm_aapcs_vfpcc double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp { 183entry: 184; CHECK: test_fnma_f64 185; CHECK: vfnma.f64 186 %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone 187 %tmp2 = fsub double -0.0, %tmp1 188 ret double %tmp2 189} 190 191define arm_aapcs_vfpcc double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp { 192entry: 193; CHECK: test_fnma_f64_2 194; CHECK: vfnma.f64 195 %tmp1 = fsub double -0.0, %a 196 %tmp2 = fsub double -0.0, %c 197 %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone 198 ret double %tmp3 199} 200 201define arm_aapcs_vfpcc float @test_fma_const_fold(float %a, float %b) nounwind { 202; CHECK: test_fma_const_fold 203; CHECK-NOT: vfma 204; CHECK-NOT: vmul 205; CHECK: vadd 206 %ret = call float @llvm.fma.f32(float %a, float 1.0, float %b) 207 ret float %ret 208} 209 210define arm_aapcs_vfpcc float @test_fma_canonicalize(float %a, float %b) nounwind { 211; CHECK: test_fma_canonicalize 212; CHECK: vmov.f32 [[R1:s[0-9]+]], #2.000000e+00 213; CHECK: vfma.f32 {{s[0-9]+}}, {{s[0-9]+}}, [[R1]] 214 %ret = call float @llvm.fma.f32(float 2.0, float %a, float %b) 215 ret float %ret 216} 217 218; Check that very wide vector fma's can be split into legal fma's. 219define arm_aapcs_vfpcc void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, ptr %p) nounwind readnone ssp { 220; CHECK: test_fma_v8f32 221; CHECK: vfma.f32 222; CHECK: vfma.f32 223entry: 224 %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone 225 store <8 x float> %call, ptr %p, align 16 226 ret void 227} 228 229 230declare float @llvm.fma.f32(float, float, float) nounwind readnone 231declare double @llvm.fma.f64(double, double, double) nounwind readnone 232declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone 233declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 234