1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s 3 4; Following test cases check: 5; a / D; b / D; c / D; 6; => 7; recip = 1.0 / D; a * recip; b * recip; c * recip; 8define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 { 9; CHECK-LABEL: three_fdiv_float: 10; CHECK: // %bb.0: 11; CHECK-NEXT: fmov s4, #1.00000000 12; CHECK-NEXT: fdiv s4, s4, s0 13; CHECK-NEXT: fmul s0, s1, s4 14; CHECK-NEXT: fmul s1, s2, s4 15; CHECK-NEXT: fmul s2, s3, s4 16; CHECK-NEXT: b foo_3f 17 %div = fdiv float %a, %D 18 %div1 = fdiv float %b, %D 19 %div2 = fdiv float %c, %D 20 tail call void @foo_3f(float %div, float %div1, float %div2) 21 ret void 22} 23 24define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 { 25; CHECK-LABEL: three_fdiv_double: 26; CHECK: // %bb.0: 27; CHECK-NEXT: fmov d4, #1.00000000 28; CHECK-NEXT: fdiv d4, d4, d0 29; CHECK-NEXT: fmul d0, d1, d4 30; CHECK-NEXT: fmul d1, d2, d4 31; CHECK-NEXT: fmul d2, d3, d4 32; CHECK-NEXT: b foo_3d 33 %div = fdiv double %a, %D 34 %div1 = fdiv double %b, %D 35 %div2 = fdiv double %c, %D 36 tail call void @foo_3d(double %div, double %div1, double %div2) 37 ret void 38} 39 40define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 41; CHECK-LABEL: three_fdiv_4xfloat: 42; CHECK: // %bb.0: 43; CHECK-NEXT: fmov v4.4s, #1.00000000 44; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s 45; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s 46; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s 47; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s 48; CHECK-NEXT: b foo_3_4xf 49 %div = fdiv <4 x float> %a, %D 50 %div1 = fdiv <4 x float> %b, %D 51 %div2 = fdiv <4 x float> %c, %D 52 tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2) 53 ret void 54} 55 56define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { 57; CHECK-LABEL: three_fdiv_2xdouble: 58; CHECK: // %bb.0: 59; CHECK-NEXT: fmov v4.2d, #1.00000000 60; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d 61; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d 62; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d 63; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d 64; CHECK-NEXT: b foo_3_2xd 65 %div = fdiv <2 x double> %a, %D 66 %div1 = fdiv <2 x double> %b, %D 67 %div2 = fdiv <2 x double> %c, %D 68 tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2) 69 ret void 70} 71 72; Following test cases check we never combine two FDIVs if neither of them 73; calculates a reciprocal. 74define void @two_fdiv_float(float %D, float %a, float %b) #0 { 75; CHECK-LABEL: two_fdiv_float: 76; CHECK: // %bb.0: 77; CHECK-NEXT: fdiv s3, s1, s0 78; CHECK-NEXT: fdiv s1, s2, s0 79; CHECK-NEXT: fmov s0, s3 80; CHECK-NEXT: b foo_2f 81 %div = fdiv float %a, %D 82 %div1 = fdiv float %b, %D 83 tail call void @foo_2f(float %div, float %div1) 84 ret void 85} 86 87define void @two_fdiv_double(double %D, double %a, double %b) #0 { 88; CHECK-LABEL: two_fdiv_double: 89; CHECK: // %bb.0: 90; CHECK-NEXT: fdiv d3, d1, d0 91; CHECK-NEXT: fdiv d1, d2, d0 92; CHECK-NEXT: fmov d0, d3 93; CHECK-NEXT: b foo_2d 94 %div = fdiv double %a, %D 95 %div1 = fdiv double %b, %D 96 tail call void @foo_2d(double %div, double %div1) 97 ret void 98} 99 100define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 101; CHECK-LABEL: splat_three_fdiv_4xfloat: 102; CHECK: // %bb.0: 103; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 104; CHECK-NEXT: fmov v4.4s, #1.00000000 105; CHECK-NEXT: dup v0.4s, v0.s[0] 106; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s 107; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s 108; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s 109; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s 110; CHECK-NEXT: b foo_3_4xf 111 %D.ins = insertelement <4 x float> poison, float %D, i64 0 112 %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer 113 %div = fdiv <4 x float> %a, %splat 114 %div1 = fdiv <4 x float> %b, %splat 115 %div2 = fdiv <4 x float> %c, %splat 116 tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2) 117 ret void 118} 119 120define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 { 121; CHECK-LABEL: splat_fdiv_v4f32: 122; CHECK: // %bb.0: // %entry 123; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 124; CHECK-NEXT: fmov v2.4s, #1.00000000 125; CHECK-NEXT: dup v0.4s, v0.s[0] 126; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s 127; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s 128; CHECK-NEXT: ret 129entry: 130 %D.ins = insertelement <4 x float> poison, float %D, i64 0 131 %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer 132 %div = fdiv <4 x float> %a, %splat 133 ret <4 x float> %div 134} 135 136define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 { 137; CHECK-LABEL: splat_fdiv_nxv4f32: 138; CHECK: // %bb.0: // %entry 139; CHECK-NEXT: fmov s2, #1.00000000 140; CHECK-NEXT: fdiv s0, s2, s0 141; CHECK-NEXT: mov z0.s, s0 142; CHECK-NEXT: fmul z0.s, z1.s, z0.s 143; CHECK-NEXT: ret 144entry: 145 %D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0 146 %splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 147 %div = fdiv <vscale x 4 x float> %a, %splat 148 ret <vscale x 4 x float> %div 149} 150 151define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 { 152; CHECK-LABEL: splat_three_fdiv_nxv4f32: 153; CHECK: // %bb.0: // %entry 154; CHECK-NEXT: fmov s4, #1.00000000 155; CHECK-NEXT: fdiv s0, s4, s0 156; CHECK-NEXT: mov z4.s, s0 157; CHECK-NEXT: fmul z0.s, z1.s, z4.s 158; CHECK-NEXT: fmul z1.s, z2.s, z4.s 159; CHECK-NEXT: fmul z2.s, z3.s, z4.s 160; CHECK-NEXT: b foo_3_nxv4f32 161entry: 162 %D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0 163 %splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 164 %div = fdiv <vscale x 4 x float> %a, %splat 165 %div1 = fdiv <vscale x 4 x float> %b, %splat 166 %div2 = fdiv <vscale x 4 x float> %c, %splat 167 tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2) 168 ret void 169} 170 171define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 { 172; CHECK-LABEL: splat_fdiv_nxv2f64: 173; CHECK: // %bb.0: // %entry 174; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 175; CHECK-NEXT: ptrue p0.d 176; CHECK-NEXT: mov z0.d, d0 177; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d 178; CHECK-NEXT: ret 179entry: 180 %D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0 181 %splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 182 %div = fdiv <vscale x 2 x double> %a, %splat 183 ret <vscale x 2 x double> %div 184} 185 186define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 { 187; CHECK-LABEL: splat_two_fdiv_nxv2f64: 188; CHECK: // %bb.0: // %entry 189; CHECK-NEXT: fmov d3, #1.00000000 190; CHECK-NEXT: fdiv d0, d3, d0 191; CHECK-NEXT: mov z3.d, d0 192; CHECK-NEXT: fmul z0.d, z1.d, z3.d 193; CHECK-NEXT: fmul z1.d, z2.d, z3.d 194; CHECK-NEXT: b foo_2_nxv2f64 195entry: 196 %D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0 197 %splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 198 %div = fdiv <vscale x 2 x double> %a, %splat 199 %div1 = fdiv <vscale x 2 x double> %b, %splat 200 tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1) 201 ret void 202} 203 204declare void @foo_3f(float, float, float) 205declare void @foo_3d(double, double, double) 206declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>) 207declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>) 208declare void @foo_2f(float, float) 209declare void @foo_2d(double, double) 210declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 211declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 212 213attributes #0 = { "unsafe-fp-math"="true" } 214attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" } 215