1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=arm64-appe-ios -o - -run-pass=machine-combiner %s | FileCheck %s 3 4--- | 5 define float @reassoicate_some_inputs_in_different_block(ptr %a, i1 %c) { 6 ret float undef 7 } 8 9 define float @reassoicate_candidates_in_different_blocks(ptr %a, i1 %c) { 10 ret float undef 11 } 12 13 define float @reassoicate_candidates_in_different_blocks_no_sink(ptr %a, i1 %c) { 14 ret float undef 15 } 16 17 define float @no_reassociate_different_block(ptr %a, i1 %c) { 18 ret float undef 19 } 20 21 declare void @use() 22 23 24... 25# FIXME: Should reassociate the serialized reduction in bb.1 to improve parallelism. 26--- 27name: reassoicate_some_inputs_in_different_block 28alignment: 4 29tracksRegLiveness: true 30body: | 31 ; CHECK-LABEL: name: reassoicate_some_inputs_in_different_block 32 ; CHECK: bb.0: 33 ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) 34 ; CHECK-NEXT: liveins: $x0, $w1 35 ; CHECK-NEXT: {{ $}} 36 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 37 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 38 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) 39 ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) 40 ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) 41 ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4) 42 ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 43 ; CHECK-NEXT: B %bb.1 44 ; CHECK-NEXT: {{ $}} 45 ; CHECK-NEXT: bb.1: 46 ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr 47 ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], [[LDRQui1]], implicit $fpcr 48 ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 killed [[FADDv4f32_1]], killed [[FADDv4f32_]], implicit $fpcr 49 ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr 50 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub 51 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] 52 ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr 53 ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] 54 ; CHECK-NEXT: RET_ReallyLR implicit $s0 55 ; CHECK-NEXT: {{ $}} 56 ; CHECK-NEXT: bb.2: 57 ; CHECK-NEXT: $q0 = COPY [[LDRQui]] 58 ; CHECK-NEXT: $q1 = COPY [[LDRQui2]] 59 ; CHECK-NEXT: $q2 = COPY [[LDRQui1]] 60 ; CHECK-NEXT: $q3 = COPY [[LDRQui3]] 61 ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 62 bb.0: 63 successors: %bb.1, %bb.2 64 liveins: $x0, $w1 65 66 %5:gpr32 = COPY $w1 67 %4:gpr64common = COPY $x0 68 %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) 69 %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) 70 %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) 71 %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4) 72 TBZW %5, 0, %bb.2 73 B %bb.1 74 75 bb.1: 76 %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr 77 %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr 78 %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr 79 %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr 80 %10:gpr64all = COPY %9.dsub 81 %12:fpr64 = COPY %10 82 %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr 83 $s0 = COPY %11 84 RET_ReallyLR implicit $s0 85 86 bb.2: 87 $q0 = COPY %0 88 $q1 = COPY %2 89 $q2 = COPY %1 90 $q3 = COPY %3 91 TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 92 93... 94# Variation of reassoicate_some_inputs_in_different_block where the candidate 95# instructions are split across 2 blocks. 96--- 97name: reassoicate_candidates_in_different_blocks 98alignment: 4 99tracksRegLiveness: true 100body: | 101 ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks 102 ; CHECK: bb.0: 103 ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) 104 ; CHECK-NEXT: liveins: $x0, $w1 105 ; CHECK-NEXT: {{ $}} 106 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 107 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 108 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) 109 ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) 110 ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) 111 ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4) 112 ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr 113 ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr 114 ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 115 ; CHECK-NEXT: B %bb.1 116 ; CHECK-NEXT: {{ $}} 117 ; CHECK-NEXT: bb.1: 118 ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr 119 ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr 120 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub 121 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] 122 ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr 123 ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] 124 ; CHECK-NEXT: RET_ReallyLR implicit $s0 125 ; CHECK-NEXT: {{ $}} 126 ; CHECK-NEXT: bb.2: 127 ; CHECK-NEXT: $q0 = COPY [[LDRQui]] 128 ; CHECK-NEXT: $q1 = COPY [[LDRQui2]] 129 ; CHECK-NEXT: $q2 = COPY [[LDRQui1]] 130 ; CHECK-NEXT: $q3 = COPY [[LDRQui3]] 131 ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 132 bb.0: 133 successors: %bb.1, %bb.2 134 liveins: $x0, $w1 135 136 %5:gpr32 = COPY $w1 137 %4:gpr64common = COPY $x0 138 %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) 139 %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) 140 %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) 141 %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4) 142 %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr 143 %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr 144 TBZW %5, 0, %bb.2 145 B %bb.1 146 147 bb.1: 148 %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr 149 %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr 150 %10:gpr64all = COPY %9.dsub 151 %12:fpr64 = COPY %10 152 %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr 153 $s0 = COPY %11 154 RET_ReallyLR implicit $s0 155 156 bb.2: 157 $q0 = COPY %0 158 $q1 = COPY %2 159 $q2 = COPY %1 160 $q3 = COPY %3 161 TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 162 163... 164 165--- 166name: reassoicate_candidates_in_different_blocks_no_sink 167alignment: 4 168tracksRegLiveness: true 169body: | 170 ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks_no_sink 171 ; CHECK: bb.0: 172 ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) 173 ; CHECK-NEXT: liveins: $x0, $w1 174 ; CHECK-NEXT: {{ $}} 175 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 176 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 177 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) 178 ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) 179 ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) 180 ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4) 181 ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr 182 ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr 183 ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 184 ; CHECK-NEXT: B %bb.1 185 ; CHECK-NEXT: {{ $}} 186 ; CHECK-NEXT: bb.1: 187 ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr 188 ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr 189 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub 190 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] 191 ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr 192 ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] 193 ; CHECK-NEXT: RET_ReallyLR implicit $s0 194 ; CHECK-NEXT: {{ $}} 195 ; CHECK-NEXT: bb.2: 196 ; CHECK-NEXT: $q0 = COPY [[LDRQui]] 197 ; CHECK-NEXT: $q1 = COPY [[LDRQui2]] 198 ; CHECK-NEXT: $q2 = COPY [[LDRQui1]] 199 ; CHECK-NEXT: $q3 = COPY [[FADDv4f32_1]] 200 ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 201 bb.0: 202 successors: %bb.1, %bb.2 203 liveins: $x0, $w1 204 205 %5:gpr32 = COPY $w1 206 %4:gpr64common = COPY $x0 207 %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) 208 %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) 209 %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) 210 %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4) 211 %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr 212 %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr 213 TBZW %5, 0, %bb.2 214 B %bb.1 215 216 bb.1: 217 %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr 218 %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr 219 %10:gpr64all = COPY %9.dsub 220 %12:fpr64 = COPY %10 221 %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr 222 $s0 = COPY %11 223 RET_ReallyLR implicit $s0 224 225 bb.2: 226 $q0 = COPY %0 227 $q1 = COPY %2 228 $q2 = COPY %1 229 $q3 = COPY %7 230 TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 231 232... 233 234# Reassociation of the reduction in bb.1 is not profitable, because LDRQui3 has a 235# much larger latency than the other loads. 236--- 237name: no_reassociate_different_block 238alignment: 4 239tracksRegLiveness: true 240body: | 241 ; CHECK-LABEL: name: no_reassociate_different_block 242 ; CHECK: bb.0: 243 ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) 244 ; CHECK-NEXT: liveins: $x0, $w1 245 ; CHECK-NEXT: {{ $}} 246 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 247 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 248 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) 249 ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) 250 ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) 251 ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY1]], 8 :: (load (s64)) 252 ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64common = LDRXui killed [[LDRXui]], 0 :: (load (s64)) 253 ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui killed [[LDRXui1]], 0 :: (load (s128), align 4) 254 ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 255 ; CHECK-NEXT: B %bb.1 256 ; CHECK-NEXT: {{ $}} 257 ; CHECK-NEXT: bb.1: 258 ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr 259 ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr 260 ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr 261 ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr 262 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub 263 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] 264 ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr 265 ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] 266 ; CHECK-NEXT: RET_ReallyLR implicit $s0 267 ; CHECK-NEXT: {{ $}} 268 ; CHECK-NEXT: bb.2: 269 ; CHECK-NEXT: $q0 = COPY [[LDRQui]] 270 ; CHECK-NEXT: $q1 = COPY [[LDRQui2]] 271 ; CHECK-NEXT: $q2 = COPY [[LDRQui1]] 272 ; CHECK-NEXT: $q3 = COPY [[LDRQui3]] 273 ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 274 bb.0: 275 successors: %bb.1, %bb.2 276 liveins: $x0, $w1 277 278 %5:gpr32 = COPY $w1 279 %4:gpr64common = COPY $x0 280 %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) 281 %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) 282 %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) 283 %6:gpr64common = LDRXui %4, 8 :: (load (s64)) 284 %7:gpr64common = LDRXui killed %6, 0 :: (load (s64)) 285 %3:fpr128 = LDRQui killed %7, 0 :: (load (s128), align 4) 286 TBZW %5, 0, %bb.2 287 B %bb.1 288 289 bb.1: 290 %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr 291 %9:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %8, implicit $fpcr 292 %10:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %9, implicit $fpcr 293 %11:fpr128 = nofpexcept FADDPv4f32 %10, %10, implicit $fpcr 294 %12:gpr64all = COPY %11.dsub 295 %14:fpr64 = COPY %12 296 %13:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %14, implicit $fpcr 297 $s0 = COPY %13 298 RET_ReallyLR implicit $s0 299 300 bb.2: 301 $q0 = COPY %0 302 $q1 = COPY %2 303 $q2 = COPY %1 304 $q3 = COPY %3 305 TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 306 307... 308