xref: /llvm-project/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll (revision 3d18c8cd265c0c0bf1d85226c4770a2dd0f86e8f)
1; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
2; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
3; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
4; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
5
6; The following tests use the balance-fp-ops feature, and should be independent of
7; the target cpu.
8
9; RUN: llc < %s -mtriple=aarch64 -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
10; RUN: llc < %s -mtriple=aarch64 -mattr=+balance-fp-ops  -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
11
12; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
13; our test strategy is to:
14;   * Force the pass to always perform register swapping even if the dest register is of the
15;     correct color already (-force-all)
16;   * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
17;     and run it twice, once where it always hints odd, and once where it always hints even.
18;
19; We then use regex magic to check that in the two cases the register allocation is
20; different; this is what gives us the testing coverage and distinguishes cases where
21; the pass has done some work versus accidental regalloc.
22
23target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
24target triple = "aarch64"
25
26; Non-overlapping groups - shouldn't need any changing at all.
27
28; CHECK-LABEL: f1:
29; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
30; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
31; CHECK: fmadd [[x]]
32; CHECK: fmsub [[x]]
33; CHECK: fmadd [[x]]
34; CHECK: str [[x]]
35
36define void @f1(ptr nocapture readonly %p, ptr nocapture %q) #0 {
37entry:
38  %0 = load double, ptr %p, align 8
39  %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
40  %1 = load double, ptr %arrayidx1, align 8
41  %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
42  %2 = load double, ptr %arrayidx2, align 8
43  %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
44  %3 = load double, ptr %arrayidx3, align 8
45  %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
46  %4 = load double, ptr %arrayidx4, align 8
47  %mul = fmul fast double %0, %1
48  %add = fadd fast double %mul, %4
49  %mul5 = fmul fast double %1, %2
50  %add6 = fadd fast double %mul5, %add
51  %mul7 = fmul fast double %1, %3
52  %sub = fsub fast double %add6, %mul7
53  %mul8 = fmul fast double %2, %3
54  %add9 = fadd fast double %mul8, %sub
55  store double %add9, ptr %q, align 8
56  %arrayidx11 = getelementptr inbounds double, ptr %p, i64 5
57  %5 = load double, ptr %arrayidx11, align 8
58  %arrayidx12 = getelementptr inbounds double, ptr %p, i64 6
59  %6 = load double, ptr %arrayidx12, align 8
60  %arrayidx13 = getelementptr inbounds double, ptr %p, i64 7
61  %7 = load double, ptr %arrayidx13, align 8
62  %mul15 = fmul fast double %6, %7
63  %mul16 = fmul fast double %0, %5
64  %add17 = fadd fast double %mul16, %mul15
65  %mul18 = fmul fast double %5, %6
66  %add19 = fadd fast double %mul18, %add17
67  %arrayidx20 = getelementptr inbounds double, ptr %q, i64 1
68  store double %add19, ptr %arrayidx20, align 8
69  ret void
70}
71
72; Overlapping groups - coloring needed.
73
74; CHECK-LABEL: f2:
75; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
76; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
77; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
78; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
79; CHECK: fmadd [[x]]
80; CHECK: fmadd [[y]]
81; CHECK: fmsub [[x]]
82; CHECK: fmadd [[y]]
83; CHECK: fmadd [[x]]
84; CHECK: stp [[x]], [[y]]
85
86define void @f2(ptr nocapture readonly %p, ptr nocapture %q) #0 {
87entry:
88  %0 = load double, ptr %p, align 8
89  %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
90  %1 = load double, ptr %arrayidx1, align 8
91  %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
92  %2 = load double, ptr %arrayidx2, align 8
93  %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
94  %3 = load double, ptr %arrayidx3, align 8
95  %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
96  %4 = load double, ptr %arrayidx4, align 8
97  %arrayidx5 = getelementptr inbounds double, ptr %p, i64 5
98  %5 = load double, ptr %arrayidx5, align 8
99  %arrayidx6 = getelementptr inbounds double, ptr %p, i64 6
100  %6 = load double, ptr %arrayidx6, align 8
101  %arrayidx7 = getelementptr inbounds double, ptr %p, i64 7
102  %7 = load double, ptr %arrayidx7, align 8
103  %mul = fmul fast double %0, %1
104  %add = fadd fast double %mul, %7
105  %mul8 = fmul fast double %5, %6
106  %mul9 = fmul fast double %1, %2
107  %add10 = fadd fast double %mul9, %add
108  %mul11 = fmul fast double %3, %4
109  %add12 = fadd fast double %mul11, %mul8
110  %mul13 = fmul fast double %1, %3
111  %sub = fsub fast double %add10, %mul13
112  %mul14 = fmul fast double %4, %5
113  %add15 = fadd fast double %mul14, %add12
114  %mul16 = fmul fast double %2, %3
115  %add17 = fadd fast double %mul16, %sub
116  store double %add17, ptr %q, align 8
117  %arrayidx19 = getelementptr inbounds double, ptr %q, i64 1
118  store double %add15, ptr %arrayidx19, align 8
119  ret void
120}
121
122; Dest register is live on block exit - fixup needed.
123
124; CHECK-LABEL: f3:
125; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
126; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
127; CHECK: fmadd [[x]]
128; CHECK: fmsub [[x]]
129; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
130; CHECK: str [[y]]
131
132define void @f3(ptr nocapture readonly %p, ptr nocapture %q) #0 {
133entry:
134  %0 = load double, ptr %p, align 8
135  %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
136  %1 = load double, ptr %arrayidx1, align 8
137  %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
138  %2 = load double, ptr %arrayidx2, align 8
139  %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
140  %3 = load double, ptr %arrayidx3, align 8
141  %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
142  %4 = load double, ptr %arrayidx4, align 8
143  %mul = fmul fast double %0, %1
144  %add = fadd fast double %mul, %4
145  %mul5 = fmul fast double %1, %2
146  %add6 = fadd fast double %mul5, %add
147  %mul7 = fmul fast double %1, %3
148  %sub = fsub fast double %add6, %mul7
149  %mul8 = fmul fast double %2, %3
150  %add9 = fadd fast double %mul8, %sub
151  %cmp = fcmp oeq double %3, 0.000000e+00
152  br i1 %cmp, label %if.then, label %if.end
153
154if.then:                                          ; preds = %entry
155  tail call void @g() #2
156  br label %if.end
157
158if.end:                                           ; preds = %if.then, %entry
159  store double %add9, ptr %q, align 8
160  ret void
161}
162
163declare void @g(...) #1
164
165; Single precision version of f2.
166
167; CHECK-LABEL: f4:
168; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
169; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
170; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
171; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
172; CHECK: fmadd [[x]]
173; CHECK: fmadd [[y]]
174; CHECK: fmsub [[x]]
175; CHECK: fmadd [[y]]
176; CHECK: fmadd [[x]]
177; CHECK: stp [[x]], [[y]]
178
179define void @f4(ptr nocapture readonly %p, ptr nocapture %q) #0 {
180entry:
181  %0 = load float, ptr %p, align 4
182  %arrayidx1 = getelementptr inbounds float, ptr %p, i64 1
183  %1 = load float, ptr %arrayidx1, align 4
184  %arrayidx2 = getelementptr inbounds float, ptr %p, i64 2
185  %2 = load float, ptr %arrayidx2, align 4
186  %arrayidx3 = getelementptr inbounds float, ptr %p, i64 3
187  %3 = load float, ptr %arrayidx3, align 4
188  %arrayidx4 = getelementptr inbounds float, ptr %p, i64 4
189  %4 = load float, ptr %arrayidx4, align 4
190  %arrayidx5 = getelementptr inbounds float, ptr %p, i64 5
191  %5 = load float, ptr %arrayidx5, align 4
192  %arrayidx6 = getelementptr inbounds float, ptr %p, i64 6
193  %6 = load float, ptr %arrayidx6, align 4
194  %arrayidx7 = getelementptr inbounds float, ptr %p, i64 7
195  %7 = load float, ptr %arrayidx7, align 4
196  %mul = fmul fast float %0, %1
197  %add = fadd fast float %mul, %7
198  %mul8 = fmul fast float %5, %6
199  %mul9 = fmul fast float %1, %2
200  %add10 = fadd fast float %mul9, %add
201  %mul11 = fmul fast float %3, %4
202  %add12 = fadd fast float %mul11, %mul8
203  %mul13 = fmul fast float %1, %3
204  %sub = fsub fast float %add10, %mul13
205  %mul14 = fmul fast float %4, %5
206  %add15 = fadd fast float %mul14, %add12
207  %mul16 = fmul fast float %2, %3
208  %add17 = fadd fast float %mul16, %sub
209  store float %add17, ptr %q, align 4
210  %arrayidx19 = getelementptr inbounds float, ptr %q, i64 1
211  store float %add15, ptr %arrayidx19, align 4
212  ret void
213}
214
215; Single precision version of f3
216
217; CHECK-LABEL: f5:
218; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
219; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
220; CHECK: fmadd [[x]]
221; CHECK: fmsub [[x]]
222; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
223; CHECK: str [[y]]
224
225define void @f5(ptr nocapture readonly %p, ptr nocapture %q) #0 {
226entry:
227  %0 = load float, ptr %p, align 4
228  %arrayidx1 = getelementptr inbounds float, ptr %p, i64 1
229  %1 = load float, ptr %arrayidx1, align 4
230  %arrayidx2 = getelementptr inbounds float, ptr %p, i64 2
231  %2 = load float, ptr %arrayidx2, align 4
232  %arrayidx3 = getelementptr inbounds float, ptr %p, i64 3
233  %3 = load float, ptr %arrayidx3, align 4
234  %arrayidx4 = getelementptr inbounds float, ptr %p, i64 4
235  %4 = load float, ptr %arrayidx4, align 4
236  %mul = fmul fast float %0, %1
237  %add = fadd fast float %mul, %4
238  %mul5 = fmul fast float %1, %2
239  %add6 = fadd fast float %mul5, %add
240  %mul7 = fmul fast float %1, %3
241  %sub = fsub fast float %add6, %mul7
242  %mul8 = fmul fast float %2, %3
243  %add9 = fadd fast float %mul8, %sub
244  %cmp = fcmp oeq float %3, 0.000000e+00
245  br i1 %cmp, label %if.then, label %if.end
246
247if.then:                                          ; preds = %entry
248  tail call void @g() #2
249  br label %if.end
250
251if.end:                                           ; preds = %if.then, %entry
252  store float %add9, ptr %q, align 4
253  ret void
254}
255
256; Test that regmask clobbering stops a chain sequence.
257
258; CHECK-LABEL: f6:
259; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
260; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
261; CHECK: fmadd [[x]]
262; CHECK: fmsub [[x]]
263; CHECK: fmadd d0, {{.*}}, [[x]]
264; CHECK: bl hh
265; CHECK: str d0
266
267define void @f6(ptr nocapture readonly %p, ptr nocapture %q) #0 {
268entry:
269  %0 = load double, ptr %p, align 8
270  %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
271  %1 = load double, ptr %arrayidx1, align 8
272  %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
273  %2 = load double, ptr %arrayidx2, align 8
274  %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
275  %3 = load double, ptr %arrayidx3, align 8
276  %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
277  %4 = load double, ptr %arrayidx4, align 8
278  %mul = fmul fast double %0, %1
279  %add = fadd fast double %mul, %4
280  %mul5 = fmul fast double %1, %2
281  %add6 = fadd fast double %mul5, %add
282  %mul7 = fmul fast double %1, %3
283  %sub = fsub fast double %add6, %mul7
284  %mul8 = fmul fast double %2, %3
285  %add9 = fadd fast double %mul8, %sub
286  %call = tail call double @hh(double %add9) #2
287  store double %call, ptr %q, align 8
288  ret void
289}
290
291declare double @hh(double) #1
292
293; Check that we correctly deal with repeated operands.
294; The following testcase creates:
295;   %d1 = FADDDrr killed %d0, %d0
296; We'll get a crash if we naively look at the first operand, remove it
297; from the substitution list then look at the second operand.
298
299; CHECK: fmadd [[x:d[0-9]+]]
300; CHECK: fadd d1, [[x]], [[x]]
301
302define void @f7(ptr nocapture readonly %p, ptr nocapture %q) #0 {
303entry:
304  %0 = load double, ptr %p, align 8
305  %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
306  %1 = load double, ptr %arrayidx1, align 8
307  %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
308  %2 = load double, ptr %arrayidx2, align 8
309  %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
310  %3 = load double, ptr %arrayidx3, align 8
311  %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
312  %4 = load double, ptr %arrayidx4, align 8
313  %mul = fmul fast double %0, %1
314  %add = fadd fast double %mul, %4
315  %mul5 = fmul fast double %1, %2
316  %add6 = fadd fast double %mul5, %add
317  %mul7 = fmul fast double %1, %3
318  %sub = fsub fast double %add6, %mul7
319  %mul8 = fmul fast double %2, %3
320  %add9 = fadd fast double %mul8, %sub
321  %add10 = fadd fast double %add9, %add9
322  call void @hhh(double 0.0, double %add10)
323  ret void
324}
325
326declare void @hhh(double, double)
327
328attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
329attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
330attributes #2 = { nounwind }
331
332