1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=arm64-appe-ios -o - -run-pass=machine-combiner %s | FileCheck %s
3
4--- |
5  define float @reassoicate_some_inputs_in_different_block(ptr %a, i1 %c) {
6    ret float undef
7  }
8
9  define float @reassoicate_candidates_in_different_blocks(ptr %a, i1 %c) {
10    ret float undef
11  }
12
13  define float @reassoicate_candidates_in_different_blocks_no_sink(ptr %a, i1 %c) {
14    ret float undef
15  }
16
17  define float @no_reassociate_different_block(ptr %a, i1 %c) {
18    ret float undef
19  }
20
21  declare void @use()
22
23
24...
25# FIXME: Should reassociate the serialized reduction in bb.1 to improve parallelism.
26---
27name:            reassoicate_some_inputs_in_different_block
28alignment:       4
29tracksRegLiveness: true
30body:             |
31  ; CHECK-LABEL: name: reassoicate_some_inputs_in_different_block
32  ; CHECK: bb.0:
33  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
34  ; CHECK-NEXT:   liveins: $x0, $w1
35  ; CHECK-NEXT: {{  $}}
36  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w1
37  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
38  ; CHECK-NEXT:   [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
39  ; CHECK-NEXT:   [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
40  ; CHECK-NEXT:   [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
41  ; CHECK-NEXT:   [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
42  ; CHECK-NEXT:   TBZW [[COPY]], 0, %bb.2
43  ; CHECK-NEXT:   B %bb.1
44  ; CHECK-NEXT: {{  $}}
45  ; CHECK-NEXT: bb.1:
46  ; CHECK-NEXT:   [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
47  ; CHECK-NEXT:   [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], [[LDRQui1]], implicit $fpcr
48  ; CHECK-NEXT:   [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 killed [[FADDv4f32_1]], killed [[FADDv4f32_]], implicit $fpcr
49  ; CHECK-NEXT:   [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
50  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
51  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
52  ; CHECK-NEXT:   [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
53  ; CHECK-NEXT:   $s0 = COPY [[FADDPv2i32p]]
54  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
55  ; CHECK-NEXT: {{  $}}
56  ; CHECK-NEXT: bb.2:
57  ; CHECK-NEXT:   $q0 = COPY [[LDRQui]]
58  ; CHECK-NEXT:   $q1 = COPY [[LDRQui2]]
59  ; CHECK-NEXT:   $q2 = COPY [[LDRQui1]]
60  ; CHECK-NEXT:   $q3 = COPY [[LDRQui3]]
61  ; CHECK-NEXT:   TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
62  bb.0:
63    successors: %bb.1, %bb.2
64    liveins: $x0, $w1
65
66    %5:gpr32 = COPY $w1
67    %4:gpr64common = COPY $x0
68    %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
69    %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
70    %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
71    %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
72    TBZW %5, 0, %bb.2
73    B %bb.1
74
75  bb.1:
76    %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
77    %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
78    %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
79    %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
80    %10:gpr64all = COPY %9.dsub
81    %12:fpr64 = COPY %10
82    %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
83    $s0 = COPY %11
84    RET_ReallyLR implicit $s0
85
86  bb.2:
87    $q0 = COPY %0
88    $q1 = COPY %2
89    $q2 = COPY %1
90    $q3 = COPY %3
91    TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
92
93...
94# Variation of reassoicate_some_inputs_in_different_block where the candidate
95# instructions are split across 2 blocks.
96---
97name:            reassoicate_candidates_in_different_blocks
98alignment:       4
99tracksRegLiveness: true
100body:             |
101  ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks
102  ; CHECK: bb.0:
103  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
104  ; CHECK-NEXT:   liveins: $x0, $w1
105  ; CHECK-NEXT: {{  $}}
106  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w1
107  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
108  ; CHECK-NEXT:   [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
109  ; CHECK-NEXT:   [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
110  ; CHECK-NEXT:   [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
111  ; CHECK-NEXT:   [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
112  ; CHECK-NEXT:   [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
113  ; CHECK-NEXT:   [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
114  ; CHECK-NEXT:   TBZW [[COPY]], 0, %bb.2
115  ; CHECK-NEXT:   B %bb.1
116  ; CHECK-NEXT: {{  $}}
117  ; CHECK-NEXT: bb.1:
118  ; CHECK-NEXT:   [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
119  ; CHECK-NEXT:   [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
120  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
121  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
122  ; CHECK-NEXT:   [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
123  ; CHECK-NEXT:   $s0 = COPY [[FADDPv2i32p]]
124  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
125  ; CHECK-NEXT: {{  $}}
126  ; CHECK-NEXT: bb.2:
127  ; CHECK-NEXT:   $q0 = COPY [[LDRQui]]
128  ; CHECK-NEXT:   $q1 = COPY [[LDRQui2]]
129  ; CHECK-NEXT:   $q2 = COPY [[LDRQui1]]
130  ; CHECK-NEXT:   $q3 = COPY [[LDRQui3]]
131  ; CHECK-NEXT:   TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
132  bb.0:
133    successors: %bb.1, %bb.2
134    liveins: $x0, $w1
135
136    %5:gpr32 = COPY $w1
137    %4:gpr64common = COPY $x0
138    %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
139    %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
140    %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
141    %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
142    %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
143    %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
144    TBZW %5, 0, %bb.2
145    B %bb.1
146
147  bb.1:
148    %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
149    %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
150    %10:gpr64all = COPY %9.dsub
151    %12:fpr64 = COPY %10
152    %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
153    $s0 = COPY %11
154    RET_ReallyLR implicit $s0
155
156  bb.2:
157    $q0 = COPY %0
158    $q1 = COPY %2
159    $q2 = COPY %1
160    $q3 = COPY %3
161    TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
162
163...
164
165---
166name:            reassoicate_candidates_in_different_blocks_no_sink
167alignment:       4
168tracksRegLiveness: true
169body:             |
170  ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks_no_sink
171  ; CHECK: bb.0:
172  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
173  ; CHECK-NEXT:   liveins: $x0, $w1
174  ; CHECK-NEXT: {{  $}}
175  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w1
176  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
177  ; CHECK-NEXT:   [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
178  ; CHECK-NEXT:   [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
179  ; CHECK-NEXT:   [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
180  ; CHECK-NEXT:   [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
181  ; CHECK-NEXT:   [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
182  ; CHECK-NEXT:   [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
183  ; CHECK-NEXT:   TBZW [[COPY]], 0, %bb.2
184  ; CHECK-NEXT:   B %bb.1
185  ; CHECK-NEXT: {{  $}}
186  ; CHECK-NEXT: bb.1:
187  ; CHECK-NEXT:   [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
188  ; CHECK-NEXT:   [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
189  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
190  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
191  ; CHECK-NEXT:   [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
192  ; CHECK-NEXT:   $s0 = COPY [[FADDPv2i32p]]
193  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
194  ; CHECK-NEXT: {{  $}}
195  ; CHECK-NEXT: bb.2:
196  ; CHECK-NEXT:   $q0 = COPY [[LDRQui]]
197  ; CHECK-NEXT:   $q1 = COPY [[LDRQui2]]
198  ; CHECK-NEXT:   $q2 = COPY [[LDRQui1]]
199  ; CHECK-NEXT:   $q3 = COPY [[FADDv4f32_1]]
200  ; CHECK-NEXT:   TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
201  bb.0:
202    successors: %bb.1, %bb.2
203    liveins: $x0, $w1
204
205    %5:gpr32 = COPY $w1
206    %4:gpr64common = COPY $x0
207    %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
208    %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
209    %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
210    %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
211    %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
212    %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
213    TBZW %5, 0, %bb.2
214    B %bb.1
215
216  bb.1:
217    %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
218    %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
219    %10:gpr64all = COPY %9.dsub
220    %12:fpr64 = COPY %10
221    %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
222    $s0 = COPY %11
223    RET_ReallyLR implicit $s0
224
225  bb.2:
226    $q0 = COPY %0
227    $q1 = COPY %2
228    $q2 = COPY %1
229    $q3 = COPY %7
230    TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
231
232...
233
234# Reassociation of the reduction in bb.1 is not profitable, because LDRQui3 has a
235# much larger latency than the other loads.
236---
237name:            no_reassociate_different_block
238alignment:       4
239tracksRegLiveness: true
240body:             |
241  ; CHECK-LABEL: name: no_reassociate_different_block
242  ; CHECK: bb.0:
243  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
244  ; CHECK-NEXT:   liveins: $x0, $w1
245  ; CHECK-NEXT: {{  $}}
246  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w1
247  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
248  ; CHECK-NEXT:   [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
249  ; CHECK-NEXT:   [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
250  ; CHECK-NEXT:   [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
251  ; CHECK-NEXT:   [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY1]], 8 :: (load (s64))
252  ; CHECK-NEXT:   [[LDRXui1:%[0-9]+]]:gpr64common = LDRXui killed [[LDRXui]], 0 :: (load (s64))
253  ; CHECK-NEXT:   [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui killed [[LDRXui1]], 0 :: (load (s128), align 4)
254  ; CHECK-NEXT:   TBZW [[COPY]], 0, %bb.2
255  ; CHECK-NEXT:   B %bb.1
256  ; CHECK-NEXT: {{  $}}
257  ; CHECK-NEXT: bb.1:
258  ; CHECK-NEXT:   [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
259  ; CHECK-NEXT:   [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
260  ; CHECK-NEXT:   [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
261  ; CHECK-NEXT:   [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
262  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
263  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
264  ; CHECK-NEXT:   [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
265  ; CHECK-NEXT:   $s0 = COPY [[FADDPv2i32p]]
266  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
267  ; CHECK-NEXT: {{  $}}
268  ; CHECK-NEXT: bb.2:
269  ; CHECK-NEXT:   $q0 = COPY [[LDRQui]]
270  ; CHECK-NEXT:   $q1 = COPY [[LDRQui2]]
271  ; CHECK-NEXT:   $q2 = COPY [[LDRQui1]]
272  ; CHECK-NEXT:   $q3 = COPY [[LDRQui3]]
273  ; CHECK-NEXT:   TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
274  bb.0:
275    successors: %bb.1, %bb.2
276    liveins: $x0, $w1
277
278    %5:gpr32 = COPY $w1
279    %4:gpr64common = COPY $x0
280    %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
281    %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
282    %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
283    %6:gpr64common = LDRXui %4, 8 :: (load (s64))
284    %7:gpr64common = LDRXui killed %6, 0 :: (load (s64))
285    %3:fpr128 = LDRQui killed %7, 0 :: (load (s128), align 4)
286    TBZW %5, 0, %bb.2
287    B %bb.1
288
289  bb.1:
290    %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
291    %9:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %8, implicit $fpcr
292    %10:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %9, implicit $fpcr
293    %11:fpr128 = nofpexcept FADDPv4f32 %10, %10, implicit $fpcr
294    %12:gpr64all = COPY %11.dsub
295    %14:fpr64 = COPY %12
296    %13:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %14, implicit $fpcr
297    $s0 = COPY %13
298    RET_ReallyLR implicit $s0
299
300  bb.2:
301    $q0 = COPY %0
302    $q1 = COPY %2
303    $q2 = COPY %1
304    $q3 = COPY %3
305    TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
306
307...
308