1; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN 2; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD 3; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN 4; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD 5 6; The following tests use the balance-fp-ops feature, and should be independent of 7; the target cpu. 8 9; RUN: llc < %s -mtriple=aarch64 -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN 10; RUN: llc < %s -mtriple=aarch64 -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD 11 12; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so 13; our test strategy is to: 14; * Force the pass to always perform register swapping even if the dest register is of the 15; correct color already (-force-all) 16; * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance), 17; and run it twice, once where it always hints odd, and once where it always hints even. 18; 19; We then use regex magic to check that in the two cases the register allocation is 20; different; this is what gives us the testing coverage and distinguishes cases where 21; the pass has done some work versus accidental regalloc. 22 23target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 24target triple = "aarch64" 25 26; Non-overlapping groups - shouldn't need any changing at all. 27 28; CHECK-LABEL: f1: 29; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 30; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 31; CHECK: fmadd [[x]] 32; CHECK: fmsub [[x]] 33; CHECK: fmadd [[x]] 34; CHECK: str [[x]] 35 36define void @f1(ptr nocapture readonly %p, ptr nocapture %q) #0 { 37entry: 38 %0 = load double, ptr %p, align 8 39 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1 40 %1 = load double, ptr %arrayidx1, align 8 41 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2 42 %2 = load double, ptr %arrayidx2, align 8 43 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3 44 %3 = load double, ptr %arrayidx3, align 8 45 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4 46 %4 = load double, ptr %arrayidx4, align 8 47 %mul = fmul fast double %0, %1 48 %add = fadd fast double %mul, %4 49 %mul5 = fmul fast double %1, %2 50 %add6 = fadd fast double %mul5, %add 51 %mul7 = fmul fast double %1, %3 52 %sub = fsub fast double %add6, %mul7 53 %mul8 = fmul fast double %2, %3 54 %add9 = fadd fast double %mul8, %sub 55 store double %add9, ptr %q, align 8 56 %arrayidx11 = getelementptr inbounds double, ptr %p, i64 5 57 %5 = load double, ptr %arrayidx11, align 8 58 %arrayidx12 = getelementptr inbounds double, ptr %p, i64 6 59 %6 = load double, ptr %arrayidx12, align 8 60 %arrayidx13 = getelementptr inbounds double, ptr %p, i64 7 61 %7 = load double, ptr %arrayidx13, align 8 62 %mul15 = fmul fast double %6, %7 63 %mul16 = fmul fast double %0, %5 64 %add17 = fadd fast double %mul16, %mul15 65 %mul18 = fmul fast double %5, %6 66 %add19 = fadd fast double %mul18, %add17 67 %arrayidx20 = getelementptr inbounds double, ptr %q, i64 1 68 store double %add19, ptr %arrayidx20, align 8 69 ret void 70} 71 72; Overlapping groups - coloring needed. 73 74; CHECK-LABEL: f2: 75; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 76; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]] 77; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 78; CHECK-ODD: fmul [[y:d[0-9]*[02468]]] 79; CHECK: fmadd [[x]] 80; CHECK: fmadd [[y]] 81; CHECK: fmsub [[x]] 82; CHECK: fmadd [[y]] 83; CHECK: fmadd [[x]] 84; CHECK: stp [[x]], [[y]] 85 86define void @f2(ptr nocapture readonly %p, ptr nocapture %q) #0 { 87entry: 88 %0 = load double, ptr %p, align 8 89 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1 90 %1 = load double, ptr %arrayidx1, align 8 91 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2 92 %2 = load double, ptr %arrayidx2, align 8 93 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3 94 %3 = load double, ptr %arrayidx3, align 8 95 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4 96 %4 = load double, ptr %arrayidx4, align 8 97 %arrayidx5 = getelementptr inbounds double, ptr %p, i64 5 98 %5 = load double, ptr %arrayidx5, align 8 99 %arrayidx6 = getelementptr inbounds double, ptr %p, i64 6 100 %6 = load double, ptr %arrayidx6, align 8 101 %arrayidx7 = getelementptr inbounds double, ptr %p, i64 7 102 %7 = load double, ptr %arrayidx7, align 8 103 %mul = fmul fast double %0, %1 104 %add = fadd fast double %mul, %7 105 %mul8 = fmul fast double %5, %6 106 %mul9 = fmul fast double %1, %2 107 %add10 = fadd fast double %mul9, %add 108 %mul11 = fmul fast double %3, %4 109 %add12 = fadd fast double %mul11, %mul8 110 %mul13 = fmul fast double %1, %3 111 %sub = fsub fast double %add10, %mul13 112 %mul14 = fmul fast double %4, %5 113 %add15 = fadd fast double %mul14, %add12 114 %mul16 = fmul fast double %2, %3 115 %add17 = fadd fast double %mul16, %sub 116 store double %add17, ptr %q, align 8 117 %arrayidx19 = getelementptr inbounds double, ptr %q, i64 1 118 store double %add15, ptr %arrayidx19, align 8 119 ret void 120} 121 122; Dest register is live on block exit - fixup needed. 123 124; CHECK-LABEL: f3: 125; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 126; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 127; CHECK: fmadd [[x]] 128; CHECK: fmsub [[x]] 129; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]] 130; CHECK: str [[y]] 131 132define void @f3(ptr nocapture readonly %p, ptr nocapture %q) #0 { 133entry: 134 %0 = load double, ptr %p, align 8 135 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1 136 %1 = load double, ptr %arrayidx1, align 8 137 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2 138 %2 = load double, ptr %arrayidx2, align 8 139 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3 140 %3 = load double, ptr %arrayidx3, align 8 141 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4 142 %4 = load double, ptr %arrayidx4, align 8 143 %mul = fmul fast double %0, %1 144 %add = fadd fast double %mul, %4 145 %mul5 = fmul fast double %1, %2 146 %add6 = fadd fast double %mul5, %add 147 %mul7 = fmul fast double %1, %3 148 %sub = fsub fast double %add6, %mul7 149 %mul8 = fmul fast double %2, %3 150 %add9 = fadd fast double %mul8, %sub 151 %cmp = fcmp oeq double %3, 0.000000e+00 152 br i1 %cmp, label %if.then, label %if.end 153 154if.then: ; preds = %entry 155 tail call void @g() #2 156 br label %if.end 157 158if.end: ; preds = %if.then, %entry 159 store double %add9, ptr %q, align 8 160 ret void 161} 162 163declare void @g(...) #1 164 165; Single precision version of f2. 166 167; CHECK-LABEL: f4: 168; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 169; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]] 170; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 171; CHECK-ODD: fmul [[y:s[0-9]*[02468]]] 172; CHECK: fmadd [[x]] 173; CHECK: fmadd [[y]] 174; CHECK: fmsub [[x]] 175; CHECK: fmadd [[y]] 176; CHECK: fmadd [[x]] 177; CHECK: stp [[x]], [[y]] 178 179define void @f4(ptr nocapture readonly %p, ptr nocapture %q) #0 { 180entry: 181 %0 = load float, ptr %p, align 4 182 %arrayidx1 = getelementptr inbounds float, ptr %p, i64 1 183 %1 = load float, ptr %arrayidx1, align 4 184 %arrayidx2 = getelementptr inbounds float, ptr %p, i64 2 185 %2 = load float, ptr %arrayidx2, align 4 186 %arrayidx3 = getelementptr inbounds float, ptr %p, i64 3 187 %3 = load float, ptr %arrayidx3, align 4 188 %arrayidx4 = getelementptr inbounds float, ptr %p, i64 4 189 %4 = load float, ptr %arrayidx4, align 4 190 %arrayidx5 = getelementptr inbounds float, ptr %p, i64 5 191 %5 = load float, ptr %arrayidx5, align 4 192 %arrayidx6 = getelementptr inbounds float, ptr %p, i64 6 193 %6 = load float, ptr %arrayidx6, align 4 194 %arrayidx7 = getelementptr inbounds float, ptr %p, i64 7 195 %7 = load float, ptr %arrayidx7, align 4 196 %mul = fmul fast float %0, %1 197 %add = fadd fast float %mul, %7 198 %mul8 = fmul fast float %5, %6 199 %mul9 = fmul fast float %1, %2 200 %add10 = fadd fast float %mul9, %add 201 %mul11 = fmul fast float %3, %4 202 %add12 = fadd fast float %mul11, %mul8 203 %mul13 = fmul fast float %1, %3 204 %sub = fsub fast float %add10, %mul13 205 %mul14 = fmul fast float %4, %5 206 %add15 = fadd fast float %mul14, %add12 207 %mul16 = fmul fast float %2, %3 208 %add17 = fadd fast float %mul16, %sub 209 store float %add17, ptr %q, align 4 210 %arrayidx19 = getelementptr inbounds float, ptr %q, i64 1 211 store float %add15, ptr %arrayidx19, align 4 212 ret void 213} 214 215; Single precision version of f3 216 217; CHECK-LABEL: f5: 218; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 219; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 220; CHECK: fmadd [[x]] 221; CHECK: fmsub [[x]] 222; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]] 223; CHECK: str [[y]] 224 225define void @f5(ptr nocapture readonly %p, ptr nocapture %q) #0 { 226entry: 227 %0 = load float, ptr %p, align 4 228 %arrayidx1 = getelementptr inbounds float, ptr %p, i64 1 229 %1 = load float, ptr %arrayidx1, align 4 230 %arrayidx2 = getelementptr inbounds float, ptr %p, i64 2 231 %2 = load float, ptr %arrayidx2, align 4 232 %arrayidx3 = getelementptr inbounds float, ptr %p, i64 3 233 %3 = load float, ptr %arrayidx3, align 4 234 %arrayidx4 = getelementptr inbounds float, ptr %p, i64 4 235 %4 = load float, ptr %arrayidx4, align 4 236 %mul = fmul fast float %0, %1 237 %add = fadd fast float %mul, %4 238 %mul5 = fmul fast float %1, %2 239 %add6 = fadd fast float %mul5, %add 240 %mul7 = fmul fast float %1, %3 241 %sub = fsub fast float %add6, %mul7 242 %mul8 = fmul fast float %2, %3 243 %add9 = fadd fast float %mul8, %sub 244 %cmp = fcmp oeq float %3, 0.000000e+00 245 br i1 %cmp, label %if.then, label %if.end 246 247if.then: ; preds = %entry 248 tail call void @g() #2 249 br label %if.end 250 251if.end: ; preds = %if.then, %entry 252 store float %add9, ptr %q, align 4 253 ret void 254} 255 256; Test that regmask clobbering stops a chain sequence. 257 258; CHECK-LABEL: f6: 259; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 260; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 261; CHECK: fmadd [[x]] 262; CHECK: fmsub [[x]] 263; CHECK: fmadd d0, {{.*}}, [[x]] 264; CHECK: bl hh 265; CHECK: str d0 266 267define void @f6(ptr nocapture readonly %p, ptr nocapture %q) #0 { 268entry: 269 %0 = load double, ptr %p, align 8 270 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1 271 %1 = load double, ptr %arrayidx1, align 8 272 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2 273 %2 = load double, ptr %arrayidx2, align 8 274 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3 275 %3 = load double, ptr %arrayidx3, align 8 276 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4 277 %4 = load double, ptr %arrayidx4, align 8 278 %mul = fmul fast double %0, %1 279 %add = fadd fast double %mul, %4 280 %mul5 = fmul fast double %1, %2 281 %add6 = fadd fast double %mul5, %add 282 %mul7 = fmul fast double %1, %3 283 %sub = fsub fast double %add6, %mul7 284 %mul8 = fmul fast double %2, %3 285 %add9 = fadd fast double %mul8, %sub 286 %call = tail call double @hh(double %add9) #2 287 store double %call, ptr %q, align 8 288 ret void 289} 290 291declare double @hh(double) #1 292 293; Check that we correctly deal with repeated operands. 294; The following testcase creates: 295; %d1 = FADDDrr killed %d0, %d0 296; We'll get a crash if we naively look at the first operand, remove it 297; from the substitution list then look at the second operand. 298 299; CHECK: fmadd [[x:d[0-9]+]] 300; CHECK: fadd d1, [[x]], [[x]] 301 302define void @f7(ptr nocapture readonly %p, ptr nocapture %q) #0 { 303entry: 304 %0 = load double, ptr %p, align 8 305 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1 306 %1 = load double, ptr %arrayidx1, align 8 307 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2 308 %2 = load double, ptr %arrayidx2, align 8 309 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3 310 %3 = load double, ptr %arrayidx3, align 8 311 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4 312 %4 = load double, ptr %arrayidx4, align 8 313 %mul = fmul fast double %0, %1 314 %add = fadd fast double %mul, %4 315 %mul5 = fmul fast double %1, %2 316 %add6 = fadd fast double %mul5, %add 317 %mul7 = fmul fast double %1, %3 318 %sub = fsub fast double %add6, %mul7 319 %mul8 = fmul fast double %2, %3 320 %add9 = fadd fast double %mul8, %sub 321 %add10 = fadd fast double %add9, %add9 322 call void @hhh(double 0.0, double %add10) 323 ret void 324} 325 326declare void @hhh(double, double) 327 328attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 329attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 330attributes #2 = { nounwind } 331 332