1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s 3 4define double @test1(double %a, double %b) { 5; CHECK-LABEL: test1: 6; CHECK: // %bb.0: 7; CHECK-NEXT: fadd d1, d1, d1 8; CHECK-NEXT: fsub d0, d0, d1 9; CHECK-NEXT: ret 10 %mul = fmul double %b, -2.000000e+00 11 %add1 = fadd double %a, %mul 12 ret double %add1 13} 14 15; DAGCombine will canonicalize 'a - 2.0*b' to 'a + -2.0*b' 16 17define double @test2(double %a, double %b) { 18; CHECK-LABEL: test2: 19; CHECK: // %bb.0: 20; CHECK-NEXT: fadd d1, d1, d1 21; CHECK-NEXT: fsub d0, d0, d1 22; CHECK-NEXT: ret 23 %mul = fmul double %b, 2.000000e+00 24 %add1 = fsub double %a, %mul 25 ret double %add1 26} 27 28define double @test3(double %a, double %b, double %c) { 29; CHECK-LABEL: test3: 30; CHECK: // %bb.0: 31; CHECK-NEXT: fmul d0, d0, d1 32; CHECK-NEXT: fadd d1, d2, d2 33; CHECK-NEXT: fsub d0, d0, d1 34; CHECK-NEXT: ret 35 %mul = fmul double %a, %b 36 %mul1 = fmul double %c, 2.000000e+00 37 %sub = fsub double %mul, %mul1 38 ret double %sub 39} 40 41define double @test4(double %a, double %b, double %c) { 42; CHECK-LABEL: test4: 43; CHECK: // %bb.0: 44; CHECK-NEXT: fmul d0, d0, d1 45; CHECK-NEXT: fadd d1, d2, d2 46; CHECK-NEXT: fsub d0, d0, d1 47; CHECK-NEXT: ret 48 %mul = fmul double %a, %b 49 %mul1 = fmul double %c, -2.000000e+00 50 %add2 = fadd double %mul, %mul1 51 ret double %add2 52} 53 54define <4 x float> @fmulnegtwo_vec(<4 x float> %a, <4 x float> %b) { 55; CHECK-LABEL: fmulnegtwo_vec: 56; CHECK: // %bb.0: 57; CHECK-NEXT: fadd v1.4s, v1.4s, v1.4s 58; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s 59; CHECK-NEXT: ret 60 %mul = fmul <4 x float> %b, <float -2.0, float -2.0, float -2.0, float -2.0> 61 %add = fadd <4 x float> %a, %mul 62 ret <4 x float> %add 63} 64 65define <4 x float> @fmulnegtwo_vec_commute(<4 x float> %a, <4 x float> %b) { 66; CHECK-LABEL: fmulnegtwo_vec_commute: 67; CHECK: // %bb.0: 68; CHECK-NEXT: fadd v1.4s, v1.4s, v1.4s 69; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s 70; CHECK-NEXT: ret 71 %mul = fmul <4 x float> %b, <float -2.0, float -2.0, float -2.0, float -2.0> 72 %add = fadd <4 x float> %mul, %a 73 ret <4 x float> %add 74} 75 76define <4 x float> @fmulnegtwo_vec_undefs(<4 x float> %a, <4 x float> %b) { 77; CHECK-LABEL: fmulnegtwo_vec_undefs: 78; CHECK: // %bb.0: 79; CHECK-NEXT: fadd v1.4s, v1.4s, v1.4s 80; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s 81; CHECK-NEXT: ret 82 %mul = fmul <4 x float> %b, <float undef, float -2.0, float undef, float -2.0> 83 %add = fadd <4 x float> %a, %mul 84 ret <4 x float> %add 85} 86 87define <4 x float> @fmulnegtwo_vec_commute_undefs(<4 x float> %a, <4 x float> %b) { 88; CHECK-LABEL: fmulnegtwo_vec_commute_undefs: 89; CHECK: // %bb.0: 90; CHECK-NEXT: fadd v1.4s, v1.4s, v1.4s 91; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s 92; CHECK-NEXT: ret 93 %mul = fmul <4 x float> %b, <float -2.0, float undef, float -2.0, float -2.0> 94 %add = fadd <4 x float> %mul, %a 95 ret <4 x float> %add 96} 97 98define <4 x float> @test6(<4 x float> %a, <4 x float> %b) { 99; CHECK-LABEL: test6: 100; CHECK: // %bb.0: 101; CHECK-NEXT: fadd v1.4s, v1.4s, v1.4s 102; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s 103; CHECK-NEXT: ret 104 %mul = fmul <4 x float> %b, <float 2.0, float 2.0, float 2.0, float 2.0> 105 %add = fsub <4 x float> %a, %mul 106 ret <4 x float> %add 107} 108 109; Don't fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) if the fmul has 110; multiple uses. 111 112define double @test7(double %a, double %b) nounwind { 113; CHECK-LABEL: test7: 114; CHECK: // %bb.0: 115; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill 116; CHECK-NEXT: fmov d2, #-2.00000000 117; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill 118; CHECK-NEXT: fmul d1, d1, d2 119; CHECK-NEXT: fadd d8, d0, d1 120; CHECK-NEXT: fmov d0, d1 121; CHECK-NEXT: bl use 122; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload 123; CHECK-NEXT: fmov d0, d8 124; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload 125; CHECK-NEXT: ret 126 %mul = fmul double %b, -2.000000e+00 127 %add1 = fadd double %a, %mul 128 call void @use(double %mul) 129 ret double %add1 130} 131 132define float @fadd_const_multiuse_fmf(float %x) { 133; CHECK-LABEL: fadd_const_multiuse_fmf: 134; CHECK: // %bb.0: 135; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 136; CHECK-NEXT: mov w9, #1114374144 // =0x426c0000 137; CHECK-NEXT: fmov s1, w8 138; CHECK-NEXT: fmov s2, w9 139; CHECK-NEXT: fadd s1, s0, s1 140; CHECK-NEXT: fadd s0, s0, s2 141; CHECK-NEXT: fadd s0, s1, s0 142; CHECK-NEXT: ret 143 %a1 = fadd float %x, 42.0 144 %a2 = fadd nsz reassoc float %a1, 17.0 145 %a3 = fadd float %a1, %a2 146 ret float %a3 147} 148 149; DAGCombiner transforms this into: (x + 17.0) + (x + 59.0). 150define float @fadd_const_multiuse_attr(float %x) { 151; CHECK-LABEL: fadd_const_multiuse_attr: 152; CHECK: // %bb.0: 153; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 154; CHECK-NEXT: mov w9, #1114374144 // =0x426c0000 155; CHECK-NEXT: fmov s1, w8 156; CHECK-NEXT: fmov s2, w9 157; CHECK-NEXT: fadd s1, s0, s1 158; CHECK-NEXT: fadd s0, s0, s2 159; CHECK-NEXT: fadd s0, s1, s0 160; CHECK-NEXT: ret 161 %a1 = fadd fast float %x, 42.0 162 %a2 = fadd fast float %a1, 17.0 163 %a3 = fadd fast float %a1, %a2 164 ret float %a3 165} 166 167; PR32939 - https://bugs.llvm.org/show_bug.cgi?id=32939 168 169define double @fmul2_negated(double %a, double %b, double %c) { 170; CHECK-LABEL: fmul2_negated: 171; CHECK: // %bb.0: 172; CHECK-NEXT: fadd d1, d1, d1 173; CHECK-NEXT: fmul d1, d1, d2 174; CHECK-NEXT: fsub d0, d0, d1 175; CHECK-NEXT: ret 176 %mul = fmul double %b, 2.0 177 %mul1 = fmul double %mul, %c 178 %sub = fsub double %a, %mul1 179 ret double %sub 180} 181 182define <2 x double> @fmul2_negated_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 183; CHECK-LABEL: fmul2_negated_vec: 184; CHECK: // %bb.0: 185; CHECK-NEXT: fadd v1.2d, v1.2d, v1.2d 186; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 187; CHECK-NEXT: fsub v0.2d, v0.2d, v1.2d 188; CHECK-NEXT: ret 189 %mul = fmul <2 x double> %b, <double 2.0, double 2.0> 190 %mul1 = fmul <2 x double> %mul, %c 191 %sub = fsub <2 x double> %a, %mul1 192 ret <2 x double> %sub 193} 194 195; ((a*b) + (c*d)) + n1 --> (a*b) + ((c*d) + n1) 196 197define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, double %n1) nounwind { 198; CHECK-LABEL: fadd_fma_fmul_1: 199; CHECK: // %bb.0: 200; CHECK-NEXT: fmadd d2, d2, d3, d4 201; CHECK-NEXT: fmadd d0, d0, d1, d2 202; CHECK-NEXT: ret 203 %m1 = fmul fast double %a, %b 204 %m2 = fmul fast double %c, %d 205 %a1 = fadd fast double %m1, %m2 206 %a2 = fadd fast double %a1, %n1 207 ret double %a2 208} 209 210; Minimum FMF - the 1st fadd is contracted because that combines 211; fmul+fadd as specified by the order of operations; the 2nd fadd 212; requires reassociation to fuse with c*d. 213 214define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n0) nounwind { 215; CHECK-LABEL: fadd_fma_fmul_fmf: 216; CHECK: // %bb.0: 217; CHECK-NEXT: fmadd s2, s2, s3, s4 218; CHECK-NEXT: fmadd s0, s0, s1, s2 219; CHECK-NEXT: ret 220 %m1 = fmul contract float %a, %b 221 %m2 = fmul contract float %c, %d 222 %a1 = fadd contract float %m1, %m2 223 %a2 = fadd contract reassoc float %n0, %a1 224 ret float %a2 225} 226 227; Not minimum FMF. 228 229define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) nounwind { 230; CHECK-LABEL: fadd_fma_fmul_2: 231; CHECK: // %bb.0: 232; CHECK-NEXT: fmul s2, s2, s3 233; CHECK-NEXT: fmadd s0, s0, s1, s2 234; CHECK-NEXT: fadd s0, s4, s0 235; CHECK-NEXT: ret 236 %m1 = fmul float %a, %b 237 %m2 = fmul float %c, %d 238 %a1 = fadd contract float %m1, %m2 239 %a2 = fadd contract float %n0, %a1 240 ret float %a2 241} 242 243; The final fadd can be folded with either 1 of the leading fmuls. 244 245define <2 x double> @fadd_fma_fmul_3(<2 x double> %x1, <2 x double> %x2, <2 x double> %x3, <2 x double> %x4, <2 x double> %x5, <2 x double> %x6, <2 x double> %x7, <2 x double> %x8) nounwind { 246; CHECK-LABEL: fadd_fma_fmul_3: 247; CHECK: // %bb.0: 248; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d 249; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d 250; CHECK-NEXT: fmla v2.2d, v7.2d, v6.2d 251; CHECK-NEXT: fmla v2.2d, v5.2d, v4.2d 252; CHECK-NEXT: mov v0.16b, v2.16b 253; CHECK-NEXT: ret 254 %m1 = fmul fast <2 x double> %x1, %x2 255 %m2 = fmul fast <2 x double> %x3, %x4 256 %m3 = fmul fast <2 x double> %x5, %x6 257 %m4 = fmul fast <2 x double> %x7, %x8 258 %a1 = fadd fast <2 x double> %m1, %m2 259 %a2 = fadd fast <2 x double> %m3, %m4 260 %a3 = fadd fast <2 x double> %a1, %a2 261 ret <2 x double> %a3 262} 263 264; negative test 265 266define float @fadd_fma_fmul_extra_use_1(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind { 267; CHECK-LABEL: fadd_fma_fmul_extra_use_1: 268; CHECK: // %bb.0: 269; CHECK-NEXT: fmul s1, s0, s1 270; CHECK-NEXT: fmadd s0, s2, s3, s1 271; CHECK-NEXT: str s1, [x0] 272; CHECK-NEXT: fadd s0, s4, s0 273; CHECK-NEXT: ret 274 %m1 = fmul fast float %a, %b 275 store float %m1, ptr %p 276 %m2 = fmul fast float %c, %d 277 %a1 = fadd fast float %m1, %m2 278 %a2 = fadd fast float %n0, %a1 279 ret float %a2 280} 281 282; negative test 283 284define float @fadd_fma_fmul_extra_use_2(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind { 285; CHECK-LABEL: fadd_fma_fmul_extra_use_2: 286; CHECK: // %bb.0: 287; CHECK-NEXT: fmul s2, s2, s3 288; CHECK-NEXT: fmadd s0, s0, s1, s2 289; CHECK-NEXT: str s2, [x0] 290; CHECK-NEXT: fadd s0, s4, s0 291; CHECK-NEXT: ret 292 %m1 = fmul fast float %a, %b 293 %m2 = fmul fast float %c, %d 294 store float %m2, ptr %p 295 %a1 = fadd fast float %m1, %m2 296 %a2 = fadd fast float %n0, %a1 297 ret float %a2 298} 299 300; negative test 301 302define float @fadd_fma_fmul_extra_use_3(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind { 303; CHECK-LABEL: fadd_fma_fmul_extra_use_3: 304; CHECK: // %bb.0: 305; CHECK-NEXT: fmul s2, s2, s3 306; CHECK-NEXT: fmadd s1, s0, s1, s2 307; CHECK-NEXT: fadd s0, s4, s1 308; CHECK-NEXT: str s1, [x0] 309; CHECK-NEXT: ret 310 %m1 = fmul fast float %a, %b 311 %m2 = fmul fast float %c, %d 312 %a1 = fadd fast float %m1, %m2 313 store float %a1, ptr %p 314 %a2 = fadd fast float %n0, %a1 315 ret float %a2 316} 317 318define float @fmac_sequence_innermost_fmul(float %a, float %b, float %c, float %d, float %e, float %f, float %g) { 319; CHECK-LABEL: fmac_sequence_innermost_fmul: 320; CHECK: // %bb.0: 321; CHECK-NEXT: fmadd s0, s0, s1, s6 322; CHECK-NEXT: fmadd s0, s2, s3, s0 323; CHECK-NEXT: fmadd s0, s4, s5, s0 324; CHECK-NEXT: ret 325 %t0 = fmul float %a, %b 326 %t1 = fmul contract float %c, %d 327 %t2 = fadd contract float %t0, %t1 328 %t3 = fmul contract float %e, %f 329 %t4 = fadd contract float %t2, %t3 330 %t5 = fadd contract reassoc float %t4, %g 331 ret float %t5 332} 333 334define float @fmac_sequence_innermost_fmul_intrinsics(float %a, float %b, float %c, float %d, float %e, float %f, float %g) { 335; CHECK-LABEL: fmac_sequence_innermost_fmul_intrinsics: 336; CHECK: // %bb.0: 337; CHECK-NEXT: fmadd s0, s0, s1, s6 338; CHECK-NEXT: fmadd s0, s2, s3, s0 339; CHECK-NEXT: fmadd s0, s4, s5, s0 340; CHECK-NEXT: ret 341 %t0 = fmul float %a, %b 342 %t1 = call float @llvm.fma.f32(float %c, float %d, float %t0) 343 %t2 = call float @llvm.fma.f32(float %e, float %f, float %t1) 344 %t5 = fadd contract reassoc float %t2, %g 345 ret float %t5 346} 347 348declare float @llvm.fma.f32(float, float, float) 349 350declare void @use(double) 351 352