xref: /llvm-project/llvm/test/CodeGen/AArch64/fdiv-combine.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
3
4; Following test cases check:
5;   a / D; b / D; c / D;
6;                =>
7;   recip = 1.0 / D; a * recip; b * recip; c * recip;
8define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
9; CHECK-LABEL: three_fdiv_float:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    fmov s4, #1.00000000
12; CHECK-NEXT:    fdiv s4, s4, s0
13; CHECK-NEXT:    fmul s0, s1, s4
14; CHECK-NEXT:    fmul s1, s2, s4
15; CHECK-NEXT:    fmul s2, s3, s4
16; CHECK-NEXT:    b foo_3f
17  %div = fdiv float %a, %D
18  %div1 = fdiv float %b, %D
19  %div2 = fdiv float %c, %D
20  tail call void @foo_3f(float %div, float %div1, float %div2)
21  ret void
22}
23
24define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
25; CHECK-LABEL: three_fdiv_double:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    fmov d4, #1.00000000
28; CHECK-NEXT:    fdiv d4, d4, d0
29; CHECK-NEXT:    fmul d0, d1, d4
30; CHECK-NEXT:    fmul d1, d2, d4
31; CHECK-NEXT:    fmul d2, d3, d4
32; CHECK-NEXT:    b foo_3d
33  %div = fdiv double %a, %D
34  %div1 = fdiv double %b, %D
35  %div2 = fdiv double %c, %D
36  tail call void @foo_3d(double %div, double %div1, double %div2)
37  ret void
38}
39
40define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
41; CHECK-LABEL: three_fdiv_4xfloat:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    fmov v4.4s, #1.00000000
44; CHECK-NEXT:    fdiv v4.4s, v4.4s, v0.4s
45; CHECK-NEXT:    fmul v0.4s, v1.4s, v4.4s
46; CHECK-NEXT:    fmul v1.4s, v2.4s, v4.4s
47; CHECK-NEXT:    fmul v2.4s, v3.4s, v4.4s
48; CHECK-NEXT:    b foo_3_4xf
49  %div = fdiv <4 x float> %a, %D
50  %div1 = fdiv <4 x float> %b, %D
51  %div2 = fdiv <4 x float> %c, %D
52  tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
53  ret void
54}
55
56define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
57; CHECK-LABEL: three_fdiv_2xdouble:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    fmov v4.2d, #1.00000000
60; CHECK-NEXT:    fdiv v4.2d, v4.2d, v0.2d
61; CHECK-NEXT:    fmul v0.2d, v1.2d, v4.2d
62; CHECK-NEXT:    fmul v1.2d, v2.2d, v4.2d
63; CHECK-NEXT:    fmul v2.2d, v3.2d, v4.2d
64; CHECK-NEXT:    b foo_3_2xd
65  %div = fdiv <2 x double> %a, %D
66  %div1 = fdiv <2 x double> %b, %D
67  %div2 = fdiv <2 x double> %c, %D
68  tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
69  ret void
70}
71
72; Following test cases check we never combine two FDIVs if neither of them
73; calculates a reciprocal.
74define void @two_fdiv_float(float %D, float %a, float %b) #0 {
75; CHECK-LABEL: two_fdiv_float:
76; CHECK:       // %bb.0:
77; CHECK-NEXT:    fdiv s3, s1, s0
78; CHECK-NEXT:    fdiv s1, s2, s0
79; CHECK-NEXT:    fmov s0, s3
80; CHECK-NEXT:    b foo_2f
81  %div = fdiv float %a, %D
82  %div1 = fdiv float %b, %D
83  tail call void @foo_2f(float %div, float %div1)
84  ret void
85}
86
87define void @two_fdiv_double(double %D, double %a, double %b) #0 {
88; CHECK-LABEL: two_fdiv_double:
89; CHECK:       // %bb.0:
90; CHECK-NEXT:    fdiv d3, d1, d0
91; CHECK-NEXT:    fdiv d1, d2, d0
92; CHECK-NEXT:    fmov d0, d3
93; CHECK-NEXT:    b foo_2d
94  %div = fdiv double %a, %D
95  %div1 = fdiv double %b, %D
96  tail call void @foo_2d(double %div, double %div1)
97  ret void
98}
99
100define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
101; CHECK-LABEL: splat_three_fdiv_4xfloat:
102; CHECK:       // %bb.0:
103; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
104; CHECK-NEXT:    fmov v4.4s, #1.00000000
105; CHECK-NEXT:    dup v0.4s, v0.s[0]
106; CHECK-NEXT:    fdiv v4.4s, v4.4s, v0.4s
107; CHECK-NEXT:    fmul v0.4s, v1.4s, v4.4s
108; CHECK-NEXT:    fmul v1.4s, v2.4s, v4.4s
109; CHECK-NEXT:    fmul v2.4s, v3.4s, v4.4s
110; CHECK-NEXT:    b foo_3_4xf
111  %D.ins = insertelement <4 x float> poison, float %D, i64 0
112  %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
113  %div = fdiv <4 x float> %a, %splat
114  %div1 = fdiv <4 x float> %b, %splat
115  %div2 = fdiv <4 x float> %c, %splat
116  tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
117  ret void
118}
119
120define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
121; CHECK-LABEL: splat_fdiv_v4f32:
122; CHECK:       // %bb.0: // %entry
123; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
124; CHECK-NEXT:    fmov v2.4s, #1.00000000
125; CHECK-NEXT:    dup v0.4s, v0.s[0]
126; CHECK-NEXT:    fdiv v0.4s, v2.4s, v0.4s
127; CHECK-NEXT:    fmul v0.4s, v1.4s, v0.4s
128; CHECK-NEXT:    ret
129entry:
130  %D.ins = insertelement <4 x float> poison, float %D, i64 0
131  %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
132  %div = fdiv <4 x float> %a, %splat
133  ret <4 x float> %div
134}
135
136define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
137; CHECK-LABEL: splat_fdiv_nxv4f32:
138; CHECK:       // %bb.0: // %entry
139; CHECK-NEXT:    fmov s2, #1.00000000
140; CHECK-NEXT:    fdiv s0, s2, s0
141; CHECK-NEXT:    mov z0.s, s0
142; CHECK-NEXT:    fmul z0.s, z1.s, z0.s
143; CHECK-NEXT:    ret
144entry:
145  %D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
146  %splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
147  %div = fdiv <vscale x 4 x float> %a, %splat
148  ret <vscale x 4 x float> %div
149}
150
151define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
152; CHECK-LABEL: splat_three_fdiv_nxv4f32:
153; CHECK:       // %bb.0: // %entry
154; CHECK-NEXT:    fmov s4, #1.00000000
155; CHECK-NEXT:    fdiv s0, s4, s0
156; CHECK-NEXT:    mov z4.s, s0
157; CHECK-NEXT:    fmul z0.s, z1.s, z4.s
158; CHECK-NEXT:    fmul z1.s, z2.s, z4.s
159; CHECK-NEXT:    fmul z2.s, z3.s, z4.s
160; CHECK-NEXT:    b foo_3_nxv4f32
161entry:
162  %D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
163  %splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
164  %div = fdiv <vscale x 4 x float> %a, %splat
165  %div1 = fdiv <vscale x 4 x float> %b, %splat
166  %div2 = fdiv <vscale x 4 x float> %c, %splat
167  tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2)
168  ret void
169}
170
171define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
172; CHECK-LABEL: splat_fdiv_nxv2f64:
173; CHECK:       // %bb.0: // %entry
174; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
175; CHECK-NEXT:    ptrue p0.d
176; CHECK-NEXT:    mov z0.d, d0
177; CHECK-NEXT:    fdivr z0.d, p0/m, z0.d, z1.d
178; CHECK-NEXT:    ret
179entry:
180  %D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
181  %splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
182  %div = fdiv <vscale x 2 x double> %a, %splat
183  ret <vscale x 2 x double> %div
184}
185
186define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
187; CHECK-LABEL: splat_two_fdiv_nxv2f64:
188; CHECK:       // %bb.0: // %entry
189; CHECK-NEXT:    fmov d3, #1.00000000
190; CHECK-NEXT:    fdiv d0, d3, d0
191; CHECK-NEXT:    mov z3.d, d0
192; CHECK-NEXT:    fmul z0.d, z1.d, z3.d
193; CHECK-NEXT:    fmul z1.d, z2.d, z3.d
194; CHECK-NEXT:    b foo_2_nxv2f64
195entry:
196  %D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
197  %splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
198  %div = fdiv <vscale x 2 x double> %a, %splat
199  %div1 = fdiv <vscale x 2 x double> %b, %splat
200  tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1)
201  ret void
202}
203
204declare void @foo_3f(float, float, float)
205declare void @foo_3d(double, double, double)
206declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
207declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
208declare void @foo_2f(float, float)
209declare void @foo_2d(double, double)
210declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
211declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
212
213attributes #0 = { "unsafe-fp-math"="true" }
214attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }
215