1; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s 2; RUN: opt -disable-output -passes=debugify < %s 3 4declare void @error(i32 %i, i32 %a, i32 %b) 5 6define i32 @test_ifchains(i32 %i, ptr %a, i32 %b) { 7; Test a chain of ifs, where the block guarded by the if is error handling code 8; that is not expected to run. 9; CHECK-LABEL: test_ifchains: 10; CHECK: %entry 11; CHECK-NOT: .p2align 12; CHECK: %else1 13; CHECK-NOT: .p2align 14; CHECK: %else2 15; CHECK-NOT: .p2align 16; CHECK: %else3 17; CHECK-NOT: .p2align 18; CHECK: %else4 19; CHECK-NOT: .p2align 20; CHECK: %exit 21; CHECK: %then1 22; CHECK: %then2 23; CHECK: %then3 24; CHECK: %then4 25; CHECK: %then5 26 27entry: 28 %gep1 = getelementptr i32, ptr %a, i32 1 29 %val1 = load i32, ptr %gep1 30 %cond1 = icmp ugt i32 %val1, 1 31 br i1 %cond1, label %then1, label %else1, !prof !0 32 33then1: 34 call void @error(i32 %i, i32 1, i32 %b) 35 br label %else1 36 37else1: 38 %gep2 = getelementptr i32, ptr %a, i32 2 39 %val2 = load i32, ptr %gep2 40 %cond2 = icmp ugt i32 %val2, 2 41 br i1 %cond2, label %then2, label %else2, !prof !0 42 43then2: 44 call void @error(i32 %i, i32 1, i32 %b) 45 br label %else2 46 47else2: 48 %gep3 = getelementptr i32, ptr %a, i32 3 49 %val3 = load i32, ptr %gep3 50 %cond3 = icmp ugt i32 %val3, 3 51 br i1 %cond3, label %then3, label %else3, !prof !0 52 53then3: 54 call void @error(i32 %i, i32 1, i32 %b) 55 br label %else3 56 57else3: 58 %gep4 = getelementptr i32, ptr %a, i32 4 59 %val4 = load i32, ptr %gep4 60 %cond4 = icmp ugt i32 %val4, 4 61 br i1 %cond4, label %then4, label %else4, !prof !0 62 63then4: 64 call void @error(i32 %i, i32 1, i32 %b) 65 br label %else4 66 67else4: 68 %gep5 = getelementptr i32, ptr %a, i32 3 69 %val5 = load i32, ptr %gep5 70 %cond5 = icmp ugt i32 %val5, 3 71 br i1 %cond5, label %then5, label %exit, !prof !0 72 73then5: 74 call void @error(i32 %i, i32 1, i32 %b) 75 br label %exit 76 77exit: 78 ret i32 %b 79} 80 81define i32 @test_loop_cold_blocks(i32 %i, ptr %a) { 82; Check that we sink cold loop blocks after the hot loop body. 83; CHECK-LABEL: test_loop_cold_blocks: 84; CHECK: %entry 85; CHECK: .p2align 86; CHECK: %body1 87; CHECK: %body2 88; CHECK: %body3 89; CHECK-NOT: .p2align 90; CHECK: %unlikely1 91; CHECK-NOT: .p2align 92; CHECK: %unlikely2 93; CHECK: %exit 94 95entry: 96 br label %body1 97 98body1: 99 %iv = phi i32 [ 0, %entry ], [ %next, %body3 ] 100 %base = phi i32 [ 0, %entry ], [ %sum, %body3 ] 101 %unlikelycond1 = icmp slt i32 %base, 42 102 br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0 103 104unlikely1: 105 call void @error(i32 %i, i32 1, i32 %base) 106 br label %body2 107 108body2: 109 %unlikelycond2 = icmp sgt i32 %base, 21 110 br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0 111 112unlikely2: 113 call void @error(i32 %i, i32 2, i32 %base) 114 br label %body3 115 116body3: 117 %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv 118 %0 = load i32, ptr %arrayidx 119 %sum = add nsw i32 %0, %base 120 %next = add i32 %iv, 1 121 %exitcond = icmp eq i32 %next, %i 122 br i1 %exitcond, label %exit, label %body1 123 124exit: 125 ret i32 %sum 126} 127 128!0 = !{!"branch_weights", i32 1, i32 64} 129 130define i32 @test_loop_early_exits(i32 %i, ptr %a) { 131; Check that we sink early exit blocks out of loop bodies. 132; CHECK-LABEL: test_loop_early_exits: 133; CHECK: %entry 134; CHECK: %body1 135; CHECK: %body2 136; CHECK: %body3 137; CHECK: %body4 138; CHECK: %exit 139; CHECK: %bail1 140; CHECK: %bail2 141; CHECK: %bail3 142 143entry: 144 br label %body1 145 146body1: 147 %iv = phi i32 [ 0, %entry ], [ %next, %body4 ] 148 %base = phi i32 [ 0, %entry ], [ %sum, %body4 ] 149 %bailcond1 = icmp eq i32 %base, 42 150 br i1 %bailcond1, label %bail1, label %body2 151 152bail1: 153 ret i32 -1 154 155body2: 156 %bailcond2 = icmp eq i32 %base, 43 157 br i1 %bailcond2, label %bail2, label %body3 158 159bail2: 160 ret i32 -2 161 162body3: 163 %bailcond3 = icmp eq i32 %base, 44 164 br i1 %bailcond3, label %bail3, label %body4 165 166bail3: 167 ret i32 -3 168 169body4: 170 %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv 171 %0 = load i32, ptr %arrayidx 172 %sum = add nsw i32 %0, %base 173 %next = add i32 %iv, 1 174 %exitcond = icmp eq i32 %next, %i 175 br i1 %exitcond, label %exit, label %body1 176 177exit: 178 ret i32 %sum 179} 180 181; Tail duplication during layout can entirely remove body0 by duplicating it 182; into the entry block and into body1. This is a good thing but it isn't what 183; this test is looking for. So to make the blocks longer so they don't get 184; duplicated, we add some calls to dummy. 185declare void @dummy() 186 187define i32 @test_loop_rotate(i32 %i, ptr %a) { 188; Check that we rotate conditional exits from the loop to the bottom of the 189; loop, eliminating unconditional branches to the top. 190; CHECK-LABEL: test_loop_rotate: 191; CHECK: %entry 192; CHECK: %body0 193; CHECK: %body1 194; CHECK: %exit 195 196entry: 197 br label %body0 198 199body0: 200 %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] 201 %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] 202 %next = add i32 %iv, 1 203 %exitcond = icmp eq i32 %next, %i 204 call void @dummy() 205 call void @dummy() 206 br i1 %exitcond, label %exit, label %body1 207 208body1: 209 %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv 210 %0 = load i32, ptr %arrayidx 211 %sum = add nsw i32 %0, %base 212 %bailcond1 = icmp eq i32 %sum, 42 213 br label %body0 214 215exit: 216 ret i32 %base 217} 218 219define i32 @test_no_loop_rotate(i32 %i, ptr %a) { 220; Check that we don't try to rotate a loop which is already laid out with 221; fallthrough opportunities into the top and out of the bottom. 222; CHECK-LABEL: test_no_loop_rotate: 223; CHECK: %entry 224; CHECK: %body0 225; CHECK: %body1 226; CHECK: %exit 227 228entry: 229 br label %body0 230 231body0: 232 %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] 233 %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] 234 %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv 235 %0 = load i32, ptr %arrayidx 236 %sum = add nsw i32 %0, %base 237 %bailcond1 = icmp eq i32 %sum, 42 238 br i1 %bailcond1, label %exit, label %body1 239 240body1: 241 %next = add i32 %iv, 1 242 %exitcond = icmp eq i32 %next, %i 243 br i1 %exitcond, label %exit, label %body0 244 245exit: 246 ret i32 %base 247} 248 249define i32 @test_loop_align(i32 %i, ptr %a) { 250; Check that we provide basic loop body alignment with the block placement 251; pass. 252; CHECK-LABEL: test_loop_align: 253; CHECK: %entry 254; CHECK: .p2align [[ALIGN:[0-9]+]] 255; CHECK-NEXT: %body 256; CHECK: %exit 257 258entry: 259 br label %body 260 261body: 262 %iv = phi i32 [ 0, %entry ], [ %next, %body ] 263 %base = phi i32 [ 0, %entry ], [ %sum, %body ] 264 %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv 265 %0 = load i32, ptr %arrayidx 266 %sum = add nsw i32 %0, %base 267 %next = add i32 %iv, 1 268 %exitcond = icmp eq i32 %next, %i 269 br i1 %exitcond, label %exit, label %body 270 271exit: 272 ret i32 %sum 273} 274 275define i32 @test_nested_loop_align(i32 %i, ptr %a, ptr %b) { 276; Check that we provide nested loop body alignment. 277; CHECK-LABEL: test_nested_loop_align: 278; CHECK: %entry 279; CHECK: .p2align [[ALIGN]] 280; CHECK-NEXT: %loop.body.1 281; CHECK: .p2align [[ALIGN]] 282; CHECK-NEXT: %inner.loop.body 283; CHECK-NOT: .p2align 284; CHECK: %exit 285 286entry: 287 br label %loop.body.1 288 289loop.body.1: 290 %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ] 291 %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv 292 %bidx = load i32, ptr %arrayidx 293 br label %inner.loop.body 294 295inner.loop.body: 296 %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ] 297 %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ] 298 %scaled_idx = mul i32 %bidx, %iv 299 %inner.arrayidx = getelementptr inbounds i32, ptr %b, i32 %scaled_idx 300 %0 = load i32, ptr %inner.arrayidx 301 %sum = add nsw i32 %0, %base 302 %inner.next = add i32 %iv, 1 303 %inner.exitcond = icmp eq i32 %inner.next, %i 304 br i1 %inner.exitcond, label %loop.body.2, label %inner.loop.body 305 306loop.body.2: 307 %next = add i32 %iv, 1 308 %exitcond = icmp eq i32 %next, %i 309 br i1 %exitcond, label %exit, label %loop.body.1 310 311exit: 312 ret i32 %sum 313} 314 315define void @unnatural_cfg1(i1 %arg) { 316; Test that we can handle a loop with an inner unnatural loop at the end of 317; a function. This is a gross CFG reduced out of the single source GCC. 318; CHECK-LABEL: unnatural_cfg1 319; CHECK: %entry 320; CHECK: %loop.header 321; CHECK: %loop.body5 322 323entry: 324 br label %loop.header 325 326loop.header: 327 br label %loop.body1 328 329loop.body1: 330 br i1 %arg, label %loop.body3, label %loop.body2 331 332loop.body2: 333 %ptr = load ptr, ptr undef, align 4 334 br label %loop.body3 335 336loop.body3: 337 %myptr = phi ptr [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ] 338 %bcmyptr = bitcast ptr %myptr to ptr 339 %val = load i32, ptr %bcmyptr, align 4 340 %comp = icmp eq i32 %val, 48 341 br i1 %comp, label %loop.body4, label %loop.body5 342 343loop.body4: 344 br i1 %arg, label %loop.header, label %loop.body5 345 346loop.body5: 347 %ptr2 = load ptr, ptr undef, align 4 348 br label %loop.body3 349} 350 351define void @unnatural_cfg2(ptr %p0, i32 %a0, i1 %arg) { 352; Test that we can handle a loop with a nested natural loop *and* an unnatural 353; loop. This was reduced from a crash on block placement when run over 354; single-source GCC. 355; CHECK-LABEL: unnatural_cfg2 356; CHECK: %entry 357; CHECK: %loop.header 358; CHECK: %loop.body1 359; CHECK: %loop.body2 360; CHECK: %loop.body4 361; CHECK: %loop.inner2.begin 362; CHECK: %loop.body3 363; CHECK: %loop.inner1.begin 364; CHECK: %bail 365 366entry: 367 br label %loop.header 368 369loop.header: 370 %comp0 = icmp eq ptr %p0, null 371 br i1 %comp0, label %bail, label %loop.body1 372 373loop.body1: 374 %val0 = load ptr, ptr undef, align 4 375 br i1 %arg, label %loop.body2, label %loop.inner1.begin 376 377loop.body2: 378 br i1 %arg, label %loop.body4, label %loop.body3 379 380loop.body3: 381 %ptr1 = getelementptr inbounds i32, ptr %val0, i32 0 382 %castptr1 = bitcast ptr %ptr1 to ptr 383 %val1 = load ptr, ptr %castptr1, align 4 384 br label %loop.inner1.begin 385 386loop.inner1.begin: 387 %valphi = phi ptr [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ] 388 %castval = bitcast ptr %valphi to ptr 389 %comp1 = icmp eq i32 %a0, 48 390 br i1 %comp1, label %loop.inner1.end, label %loop.body4 391 392loop.inner1.end: 393 %ptr2 = getelementptr inbounds i32, ptr %valphi, i32 0 394 %castptr2 = bitcast ptr %ptr2 to ptr 395 %val2 = load ptr, ptr %castptr2, align 4 396 br label %loop.inner1.begin 397 398loop.body4.dead: 399 br label %loop.body4 400 401loop.body4: 402 %comp2 = icmp ult i32 %a0, 3 403 br i1 %comp2, label %loop.inner2.begin, label %loop.end 404 405loop.inner2.begin: 406 br i1 false, label %loop.end, label %loop.inner2.end 407 408loop.inner2.end: 409 %comp3 = icmp eq i32 %a0, 1769472 410 br i1 %comp3, label %loop.end, label %loop.inner2.begin 411 412loop.end: 413 br label %loop.header 414 415bail: 416 unreachable 417} 418 419define i32 @problematic_switch() { 420; This function's CFG caused overlow in the machine branch probability 421; calculation, triggering asserts. Make sure we don't crash on it. 422; CHECK: problematic_switch 423 424entry: 425 switch i32 undef, label %exit [ 426 i32 879, label %bogus 427 i32 877, label %step 428 i32 876, label %step 429 i32 875, label %step 430 i32 874, label %step 431 i32 873, label %step 432 i32 872, label %step 433 i32 868, label %step 434 i32 867, label %step 435 i32 866, label %step 436 i32 861, label %step 437 i32 860, label %step 438 i32 856, label %step 439 i32 855, label %step 440 i32 854, label %step 441 i32 831, label %step 442 i32 830, label %step 443 i32 829, label %step 444 i32 828, label %step 445 i32 815, label %step 446 i32 814, label %step 447 i32 811, label %step 448 i32 806, label %step 449 i32 805, label %step 450 i32 804, label %step 451 i32 803, label %step 452 i32 802, label %step 453 i32 801, label %step 454 i32 800, label %step 455 i32 799, label %step 456 i32 798, label %step 457 i32 797, label %step 458 i32 796, label %step 459 i32 795, label %step 460 ] 461bogus: 462 unreachable 463step: 464 br label %exit 465exit: 466 %merge = phi i32 [ 3, %step ], [ 6, %entry ] 467 ret i32 %merge 468} 469 470define void @fpcmp_unanalyzable_branch(i1 %cond, double %a0, i1 %arg) { 471; This function's CFG contains an once-unanalyzable branch (une on floating 472; points). As now it becomes analyzable, we should get best layout in which each 473; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is 474; fall-through. 475; CHECK-LABEL: fpcmp_unanalyzable_branch: 476; CHECK: # %bb.0: # %entry 477; CHECK: # %bb.1: # %entry.if.then_crit_edge 478; CHECK: .LBB10_5: # %if.then 479; CHECK: .LBB10_6: # %if.end 480; CHECK: # %bb.3: # %exit 481; CHECK: jne .LBB10_4 482; CHECK-NEXT: jnp .LBB10_6 483; CHECK: jmp .LBB10_5 484 485entry: 486; Note that this branch must be strongly biased toward 487; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for 488; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'. 489 br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1 490 491entry.if.then_crit_edge: 492 %.pre14 = load i8, ptr undef, align 1 493 br label %if.then 494 495lor.lhs.false: 496 br i1 %arg, label %if.end, label %exit 497 498exit: 499 %cmp.i = fcmp une double 0.000000e+00, %a0 500 br i1 %cmp.i, label %if.then, label %if.end, !prof !3 501 502if.then: 503 %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ] 504 %1 = and i8 %0, 1 505 store i8 %1, ptr undef, align 4 506 br label %if.end 507 508if.end: 509 ret void 510} 511 512!1 = !{!"branch_weights", i32 1000, i32 1} 513!3 = !{!"branch_weights", i32 1, i32 1000} 514 515declare i32 @f() 516declare i32 @g() 517declare i32 @h(i32 %x) 518 519define i32 @test_global_cfg_break_profitability(i1 %arg) { 520; Check that our metrics for the profitability of a CFG break are global rather 521; than local. A successor may be very hot, but if the current block isn't, it 522; doesn't matter. Within this test the 'then' block is slightly warmer than the 523; 'else' block, but not nearly enough to merit merging it with the exit block 524; even though the probability of 'then' branching to the 'exit' block is very 525; high. 526; CHECK: test_global_cfg_break_profitability 527; CHECK: calll {{_?}}f 528; CHECK: calll {{_?}}g 529; CHECK: calll {{_?}}h 530; CHECK: ret 531 532entry: 533 br i1 %arg, label %then, label %else, !prof !2 534 535then: 536 %then.result = call i32 @f() 537 br label %exit 538 539else: 540 %else.result = call i32 @g() 541 br label %exit 542 543exit: 544 %result = phi i32 [ %then.result, %then ], [ %else.result, %else ] 545 %result2 = call i32 @h(i32 %result) 546 ret i32 %result 547} 548 549!2 = !{!"branch_weights", i32 3, i32 1} 550 551declare i32 @__gxx_personality_v0(...) 552 553define void @test_eh_lpad_successor() personality ptr @__gxx_personality_v0 { 554; Some times the landing pad ends up as the first successor of an invoke block. 555; When this happens, a strange result used to fall out of updateTerminators: we 556; didn't correctly locate the fallthrough successor, assuming blindly that the 557; first one was the fallthrough successor. As a result, we would add an 558; erroneous jump to the landing pad thinking *that* was the default successor. 559; CHECK-LABEL: test_eh_lpad_successor 560; CHECK: %entry 561; CHECK-NOT: jmp 562; CHECK: %loop 563 564entry: 565 invoke i32 @f() to label %preheader unwind label %lpad 566 567preheader: 568 br label %loop 569 570lpad: 571 %lpad.val = landingpad { ptr, i32 } 572 cleanup 573 resume { ptr, i32 } %lpad.val 574 575loop: 576 br label %loop 577} 578 579declare void @fake_throw() noreturn 580 581define void @test_eh_throw() personality ptr @__gxx_personality_v0 { 582; For blocks containing a 'throw' (or similar functionality), we have 583; a no-return invoke. In this case, only EH successors will exist, and 584; fallthrough simply won't occur. Make sure we don't crash trying to update 585; terminators for such constructs. 586; 587; CHECK-LABEL: test_eh_throw 588; CHECK: %entry 589; CHECK: %cleanup 590 591entry: 592 invoke void @fake_throw() to label %continue unwind label %cleanup 593 594continue: 595 unreachable 596 597cleanup: 598 %0 = landingpad { ptr, i32 } 599 cleanup 600 unreachable 601} 602 603define void @test_unnatural_cfg_backwards_inner_loop(i1 %arg) { 604; Test that when we encounter an unnatural CFG structure after having formed 605; a chain for an inner loop which happened to be laid out backwards we don't 606; attempt to merge onto the wrong end of the inner loop just because we find it 607; first. This was reduced from a crasher in GCC's single source. 608; 609; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop 610; CHECK: %entry 611; CHECK: %loop2b 612; CHECK: %loop3 613 614entry: 615 br i1 %arg, label %loop2a, label %body 616 617body: 618 br label %loop2a 619 620loop1: 621 %next.load = load ptr, ptr undef 622 br i1 %comp.a, label %loop2a, label %loop2b 623 624loop2a: 625 %var = phi ptr [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ] 626 %next.var = phi ptr [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ] 627 %comp.a = icmp eq ptr %var, null 628 br label %loop3 629 630loop2b: 631 %gep = getelementptr inbounds i32, ptr %var.phi, i32 0 632 %next.ptr = bitcast ptr %gep to ptr 633 store ptr %next.phi, ptr %next.ptr 634 br label %loop3 635 636loop3: 637 %var.phi = phi ptr [ %next.phi, %loop2b ], [ %var, %loop2a ] 638 %next.phi = phi ptr [ %next.load, %loop2b ], [ %next.var, %loop2a ] 639 br label %loop1 640} 641 642define void @unanalyzable_branch_to_loop_header(double %a0) { 643; Ensure that we can handle unanalyzable branches into loop headers. We 644; pre-form chains for unanalyzable branches, and will find the tail end of that 645; at the start of the loop. This function uses floating point comparison 646; fallthrough because that happens to always produce unanalyzable branches on 647; x86. 648; 649; CHECK-LABEL: unanalyzable_branch_to_loop_header 650; CHECK: %entry 651; CHECK: %loop 652; CHECK: %exit 653 654entry: 655 %cmp = fcmp une double 0.000000e+00, %a0 656 br i1 %cmp, label %loop, label %exit 657 658loop: 659 %cond = icmp eq i8 undef, 42 660 br i1 %cond, label %exit, label %loop 661 662exit: 663 ret void 664} 665 666define void @unanalyzable_branch_to_best_succ(i1 %cond, double %a0) { 667; Ensure that we can handle unanalyzable branches where the destination block 668; gets selected as the optimal successor to merge. 669; 670; This branch is now analyzable and hence the destination block becomes the 671; hotter one. The right order is entry->bar->exit->foo. 672; 673; CHECK-LABEL: unanalyzable_branch_to_best_succ 674; CHECK: %entry 675; CHECK: %bar 676; CHECK: %exit 677; CHECK: %foo 678 679entry: 680 ; Bias this branch toward bar to ensure we form that chain. 681 br i1 %cond, label %bar, label %foo, !prof !1 682 683foo: 684 %cmp = fcmp une double 0.000000e+00, %a0 685 br i1 %cmp, label %bar, label %exit 686 687bar: 688 call i32 @f() 689 br label %exit 690 691exit: 692 ret void 693} 694 695define void @unanalyzable_branch_to_free_block(float %x, i1 %arg) { 696; Ensure that we can handle unanalyzable branches where the destination block 697; gets selected as the best free block in the CFG. 698; 699; CHECK-LABEL: unanalyzable_branch_to_free_block 700; CHECK: %entry 701; CHECK: %a 702; CHECK: %b 703; CHECK: %c 704; CHECK: %exit 705 706entry: 707 br i1 %arg, label %a, label %b 708 709a: 710 call i32 @f() 711 br label %c 712 713b: 714 %cmp = fcmp une float %x, 0.0 715 br i1 %cmp, label %c, label %exit 716 717c: 718 call i32 @g() 719 br label %exit 720 721exit: 722 ret void 723} 724 725define void @many_unanalyzable_branches() { 726; Ensure that we don't crash as we're building up many unanalyzable branches, 727; blocks, and loops. 728; 729; CHECK-LABEL: many_unanalyzable_branches 730; CHECK: %entry 731; CHECK: %exit 732 733entry: 734 br label %0 735 736 %val0 = load volatile float, ptr undef 737 %cmp0 = fcmp une float %val0, 0.0 738 br i1 %cmp0, label %1, label %0 739 %val1 = load volatile float, ptr undef 740 %cmp1 = fcmp une float %val1, 0.0 741 br i1 %cmp1, label %2, label %1 742 %val2 = load volatile float, ptr undef 743 %cmp2 = fcmp une float %val2, 0.0 744 br i1 %cmp2, label %3, label %2 745 %val3 = load volatile float, ptr undef 746 %cmp3 = fcmp une float %val3, 0.0 747 br i1 %cmp3, label %4, label %3 748 %val4 = load volatile float, ptr undef 749 %cmp4 = fcmp une float %val4, 0.0 750 br i1 %cmp4, label %5, label %4 751 %val5 = load volatile float, ptr undef 752 %cmp5 = fcmp une float %val5, 0.0 753 br i1 %cmp5, label %6, label %5 754 %val6 = load volatile float, ptr undef 755 %cmp6 = fcmp une float %val6, 0.0 756 br i1 %cmp6, label %7, label %6 757 %val7 = load volatile float, ptr undef 758 %cmp7 = fcmp une float %val7, 0.0 759 br i1 %cmp7, label %8, label %7 760 %val8 = load volatile float, ptr undef 761 %cmp8 = fcmp une float %val8, 0.0 762 br i1 %cmp8, label %9, label %8 763 %val9 = load volatile float, ptr undef 764 %cmp9 = fcmp une float %val9, 0.0 765 br i1 %cmp9, label %10, label %9 766 %val10 = load volatile float, ptr undef 767 %cmp10 = fcmp une float %val10, 0.0 768 br i1 %cmp10, label %11, label %10 769 %val11 = load volatile float, ptr undef 770 %cmp11 = fcmp une float %val11, 0.0 771 br i1 %cmp11, label %12, label %11 772 %val12 = load volatile float, ptr undef 773 %cmp12 = fcmp une float %val12, 0.0 774 br i1 %cmp12, label %13, label %12 775 %val13 = load volatile float, ptr undef 776 %cmp13 = fcmp une float %val13, 0.0 777 br i1 %cmp13, label %14, label %13 778 %val14 = load volatile float, ptr undef 779 %cmp14 = fcmp une float %val14, 0.0 780 br i1 %cmp14, label %15, label %14 781 %val15 = load volatile float, ptr undef 782 %cmp15 = fcmp une float %val15, 0.0 783 br i1 %cmp15, label %16, label %15 784 %val16 = load volatile float, ptr undef 785 %cmp16 = fcmp une float %val16, 0.0 786 br i1 %cmp16, label %17, label %16 787 %val17 = load volatile float, ptr undef 788 %cmp17 = fcmp une float %val17, 0.0 789 br i1 %cmp17, label %18, label %17 790 %val18 = load volatile float, ptr undef 791 %cmp18 = fcmp une float %val18, 0.0 792 br i1 %cmp18, label %19, label %18 793 %val19 = load volatile float, ptr undef 794 %cmp19 = fcmp une float %val19, 0.0 795 br i1 %cmp19, label %20, label %19 796 %val20 = load volatile float, ptr undef 797 %cmp20 = fcmp une float %val20, 0.0 798 br i1 %cmp20, label %21, label %20 799 %val21 = load volatile float, ptr undef 800 %cmp21 = fcmp une float %val21, 0.0 801 br i1 %cmp21, label %22, label %21 802 %val22 = load volatile float, ptr undef 803 %cmp22 = fcmp une float %val22, 0.0 804 br i1 %cmp22, label %23, label %22 805 %val23 = load volatile float, ptr undef 806 %cmp23 = fcmp une float %val23, 0.0 807 br i1 %cmp23, label %24, label %23 808 %val24 = load volatile float, ptr undef 809 %cmp24 = fcmp une float %val24, 0.0 810 br i1 %cmp24, label %25, label %24 811 %val25 = load volatile float, ptr undef 812 %cmp25 = fcmp une float %val25, 0.0 813 br i1 %cmp25, label %26, label %25 814 %val26 = load volatile float, ptr undef 815 %cmp26 = fcmp une float %val26, 0.0 816 br i1 %cmp26, label %27, label %26 817 %val27 = load volatile float, ptr undef 818 %cmp27 = fcmp une float %val27, 0.0 819 br i1 %cmp27, label %28, label %27 820 %val28 = load volatile float, ptr undef 821 %cmp28 = fcmp une float %val28, 0.0 822 br i1 %cmp28, label %29, label %28 823 %val29 = load volatile float, ptr undef 824 %cmp29 = fcmp une float %val29, 0.0 825 br i1 %cmp29, label %30, label %29 826 %val30 = load volatile float, ptr undef 827 %cmp30 = fcmp une float %val30, 0.0 828 br i1 %cmp30, label %31, label %30 829 %val31 = load volatile float, ptr undef 830 %cmp31 = fcmp une float %val31, 0.0 831 br i1 %cmp31, label %32, label %31 832 %val32 = load volatile float, ptr undef 833 %cmp32 = fcmp une float %val32, 0.0 834 br i1 %cmp32, label %33, label %32 835 %val33 = load volatile float, ptr undef 836 %cmp33 = fcmp une float %val33, 0.0 837 br i1 %cmp33, label %34, label %33 838 %val34 = load volatile float, ptr undef 839 %cmp34 = fcmp une float %val34, 0.0 840 br i1 %cmp34, label %35, label %34 841 %val35 = load volatile float, ptr undef 842 %cmp35 = fcmp une float %val35, 0.0 843 br i1 %cmp35, label %36, label %35 844 %val36 = load volatile float, ptr undef 845 %cmp36 = fcmp une float %val36, 0.0 846 br i1 %cmp36, label %37, label %36 847 %val37 = load volatile float, ptr undef 848 %cmp37 = fcmp une float %val37, 0.0 849 br i1 %cmp37, label %38, label %37 850 %val38 = load volatile float, ptr undef 851 %cmp38 = fcmp une float %val38, 0.0 852 br i1 %cmp38, label %39, label %38 853 %val39 = load volatile float, ptr undef 854 %cmp39 = fcmp une float %val39, 0.0 855 br i1 %cmp39, label %40, label %39 856 %val40 = load volatile float, ptr undef 857 %cmp40 = fcmp une float %val40, 0.0 858 br i1 %cmp40, label %41, label %40 859 %val41 = load volatile float, ptr undef 860 %cmp41 = fcmp une float %val41, undef 861 br i1 %cmp41, label %42, label %41 862 %val42 = load volatile float, ptr undef 863 %cmp42 = fcmp une float %val42, 0.0 864 br i1 %cmp42, label %43, label %42 865 %val43 = load volatile float, ptr undef 866 %cmp43 = fcmp une float %val43, 0.0 867 br i1 %cmp43, label %44, label %43 868 %val44 = load volatile float, ptr undef 869 %cmp44 = fcmp une float %val44, 0.0 870 br i1 %cmp44, label %45, label %44 871 %val45 = load volatile float, ptr undef 872 %cmp45 = fcmp une float %val45, 0.0 873 br i1 %cmp45, label %46, label %45 874 %val46 = load volatile float, ptr undef 875 %cmp46 = fcmp une float %val46, 0.0 876 br i1 %cmp46, label %47, label %46 877 %val47 = load volatile float, ptr undef 878 %cmp47 = fcmp une float %val47, 0.0 879 br i1 %cmp47, label %48, label %47 880 %val48 = load volatile float, ptr undef 881 %cmp48 = fcmp une float %val48, 0.0 882 br i1 %cmp48, label %49, label %48 883 %val49 = load volatile float, ptr undef 884 %cmp49 = fcmp une float %val49, 0.0 885 br i1 %cmp49, label %50, label %49 886 %val50 = load volatile float, ptr undef 887 %cmp50 = fcmp une float %val50, 0.0 888 br i1 %cmp50, label %51, label %50 889 %val51 = load volatile float, ptr undef 890 %cmp51 = fcmp une float %val51, 0.0 891 br i1 %cmp51, label %52, label %51 892 %val52 = load volatile float, ptr undef 893 %cmp52 = fcmp une float %val52, 0.0 894 br i1 %cmp52, label %53, label %52 895 %val53 = load volatile float, ptr undef 896 %cmp53 = fcmp une float %val53, 0.0 897 br i1 %cmp53, label %54, label %53 898 %val54 = load volatile float, ptr undef 899 %cmp54 = fcmp une float %val54, 0.0 900 br i1 %cmp54, label %55, label %54 901 %val55 = load volatile float, ptr undef 902 %cmp55 = fcmp une float %val55, 0.0 903 br i1 %cmp55, label %56, label %55 904 %val56 = load volatile float, ptr undef 905 %cmp56 = fcmp une float %val56, 0.0 906 br i1 %cmp56, label %57, label %56 907 %val57 = load volatile float, ptr undef 908 %cmp57 = fcmp une float %val57, 0.0 909 br i1 %cmp57, label %58, label %57 910 %val58 = load volatile float, ptr undef 911 %cmp58 = fcmp une float %val58, 0.0 912 br i1 %cmp58, label %59, label %58 913 %val59 = load volatile float, ptr undef 914 %cmp59 = fcmp une float %val59, 0.0 915 br i1 %cmp59, label %60, label %59 916 %val60 = load volatile float, ptr undef 917 %cmp60 = fcmp une float %val60, 0.0 918 br i1 %cmp60, label %61, label %60 919 %val61 = load volatile float, ptr undef 920 %cmp61 = fcmp une float %val61, 0.0 921 br i1 %cmp61, label %62, label %61 922 %val62 = load volatile float, ptr undef 923 %cmp62 = fcmp une float %val62, 0.0 924 br i1 %cmp62, label %63, label %62 925 %val63 = load volatile float, ptr undef 926 %cmp63 = fcmp une float %val63, 0.0 927 br i1 %cmp63, label %64, label %63 928 %val64 = load volatile float, ptr undef 929 %cmp64 = fcmp une float %val64, 0.0 930 br i1 %cmp64, label %65, label %64 931 932 br label %exit 933exit: 934 ret void 935} 936 937define void @benchmark_heapsort(i32 %n, ptr nocapture %ra) { 938; This test case comes from the heapsort benchmark, and exemplifies several 939; important aspects to block placement in the presence of loops: 940; 1) Loop rotation needs to *ensure* that the desired exiting edge can be 941; a fallthrough. 942; 2) The exiting edge from the loop which is rotated to be laid out at the 943; bottom of the loop needs to be exiting into the nearest enclosing loop (to 944; which there is an exit). Otherwise, we force that enclosing loop into 945; strange layouts that are siginificantly less efficient, often times making 946; it discontiguous. 947; 948; CHECK-LABEL: @benchmark_heapsort 949; CHECK: %entry 950; First rotated loop top. 951; CHECK: .p2align 952; CHECK: %while.end 953; %for.cond gets completely tail-duplicated away. 954; CHECK: %if.then 955; CHECK: %if.else 956; CHECK: %if.end10 957; Second rotated loop top 958; CHECK: %while.cond.outer 959; Third rotated loop top 960; CHECK: .p2align 961; CHECK: %if.end20 962; CHECK: %while.cond 963; CHECK: %while.body 964; CHECK: %land.lhs.true 965; CHECK: %if.then19 966; CHECK: %if.then24 967; CHECK: %if.then8 968; CHECK: ret 969 970entry: 971 %shr = ashr i32 %n, 1 972 %add = add nsw i32 %shr, 1 973 %arrayidx3 = getelementptr inbounds double, ptr %ra, i64 1 974 br label %for.cond 975 976for.cond: 977 %ir.0 = phi i32 [ %n, %entry ], [ %ir.1, %while.end ] 978 %l.0 = phi i32 [ %add, %entry ], [ %l.1, %while.end ] 979 %cmp = icmp sgt i32 %l.0, 1 980 br i1 %cmp, label %if.then, label %if.else 981 982if.then: 983 %dec = add nsw i32 %l.0, -1 984 %idxprom = sext i32 %dec to i64 985 %arrayidx = getelementptr inbounds double, ptr %ra, i64 %idxprom 986 %0 = load double, ptr %arrayidx, align 8 987 br label %if.end10 988 989if.else: 990 %idxprom1 = sext i32 %ir.0 to i64 991 %arrayidx2 = getelementptr inbounds double, ptr %ra, i64 %idxprom1 992 %1 = load double, ptr %arrayidx2, align 8 993 %2 = load double, ptr %arrayidx3, align 8 994 store double %2, ptr %arrayidx2, align 8 995 %dec6 = add nsw i32 %ir.0, -1 996 %cmp7 = icmp eq i32 %dec6, 1 997 br i1 %cmp7, label %if.then8, label %if.end10 998 999if.then8: 1000 store double %1, ptr %arrayidx3, align 8 1001 ret void 1002 1003if.end10: 1004 %ir.1 = phi i32 [ %ir.0, %if.then ], [ %dec6, %if.else ] 1005 %l.1 = phi i32 [ %dec, %if.then ], [ %l.0, %if.else ] 1006 %rra.0 = phi double [ %0, %if.then ], [ %1, %if.else ] 1007 %add31 = add nsw i32 %ir.1, 1 1008 br label %while.cond.outer 1009 1010while.cond.outer: 1011 %j.0.ph.in = phi i32 [ %l.1, %if.end10 ], [ %j.1, %if.then24 ] 1012 %j.0.ph = shl i32 %j.0.ph.in, 1 1013 br label %while.cond 1014 1015while.cond: 1016 %j.0 = phi i32 [ %add31, %if.end20 ], [ %j.0.ph, %while.cond.outer ] 1017 %cmp11 = icmp sgt i32 %j.0, %ir.1 1018 br i1 %cmp11, label %while.end, label %while.body 1019 1020while.body: 1021 %cmp12 = icmp slt i32 %j.0, %ir.1 1022 br i1 %cmp12, label %land.lhs.true, label %if.end20 1023 1024land.lhs.true: 1025 %idxprom13 = sext i32 %j.0 to i64 1026 %arrayidx14 = getelementptr inbounds double, ptr %ra, i64 %idxprom13 1027 %3 = load double, ptr %arrayidx14, align 8 1028 %add15 = add nsw i32 %j.0, 1 1029 %idxprom16 = sext i32 %add15 to i64 1030 %arrayidx17 = getelementptr inbounds double, ptr %ra, i64 %idxprom16 1031 %4 = load double, ptr %arrayidx17, align 8 1032 %cmp18 = fcmp olt double %3, %4 1033 br i1 %cmp18, label %if.then19, label %if.end20 1034 1035if.then19: 1036 br label %if.end20 1037 1038if.end20: 1039 %j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ] 1040 %idxprom21 = sext i32 %j.1 to i64 1041 %arrayidx22 = getelementptr inbounds double, ptr %ra, i64 %idxprom21 1042 %5 = load double, ptr %arrayidx22, align 8 1043 %cmp23 = fcmp olt double %rra.0, %5 1044 br i1 %cmp23, label %if.then24, label %while.cond 1045 1046if.then24: 1047 %idxprom27 = sext i32 %j.0.ph.in to i64 1048 %arrayidx28 = getelementptr inbounds double, ptr %ra, i64 %idxprom27 1049 store double %5, ptr %arrayidx28, align 8 1050 br label %while.cond.outer 1051 1052while.end: 1053 %idxprom33 = sext i32 %j.0.ph.in to i64 1054 %arrayidx34 = getelementptr inbounds double, ptr %ra, i64 %idxprom33 1055 store double %rra.0, ptr %arrayidx34, align 8 1056 br label %for.cond 1057} 1058 1059declare void @cold_function() cold 1060 1061define i32 @test_cold_calls(ptr %a) { 1062; Test that edges to blocks post-dominated by cold calls are 1063; marked as not expected to be taken. They should be laid out 1064; at the bottom. 1065; CHECK-LABEL: test_cold_calls: 1066; CHECK: %entry 1067; CHECK: %else 1068; CHECK: %exit 1069; CHECK: %then 1070 1071entry: 1072 %gep1 = getelementptr i32, ptr %a, i32 1 1073 %val1 = load i32, ptr %gep1 1074 %cond1 = icmp ugt i32 %val1, 1 1075 br i1 %cond1, label %then, label %else 1076 1077then: 1078 call void @cold_function() 1079 br label %exit 1080 1081else: 1082 %gep2 = getelementptr i32, ptr %a, i32 2 1083 %val2 = load i32, ptr %gep2 1084 br label %exit 1085 1086exit: 1087 %ret = phi i32 [ %val1, %then ], [ %val2, %else ] 1088 ret i32 %ret 1089} 1090 1091; Make sure we put landingpads out of the way. 1092declare i32 @pers(...) 1093 1094declare i32 @foo(); 1095 1096declare i32 @bar(); 1097 1098define i32 @test_lp(i32 %a) personality ptr @pers { 1099; CHECK-LABEL: test_lp: 1100; CHECK: %entry 1101; CHECK: %hot 1102; CHECK: %then 1103; CHECK: %cold 1104; CHECK: %coldlp 1105; CHECK: %hotlp 1106; CHECK: %lpret 1107entry: 1108 %0 = icmp sgt i32 %a, 1 1109 br i1 %0, label %hot, label %cold, !prof !4 1110 1111hot: 1112 %1 = invoke i32 @foo() 1113 to label %then unwind label %hotlp 1114 1115cold: 1116 %2 = invoke i32 @bar() 1117 to label %then unwind label %coldlp 1118 1119then: 1120 %3 = phi i32 [ %1, %hot ], [ %2, %cold ] 1121 ret i32 %3 1122 1123hotlp: 1124 %4 = landingpad { ptr, i32 } 1125 cleanup 1126 br label %lpret 1127 1128coldlp: 1129 %5 = landingpad { ptr, i32 } 1130 cleanup 1131 br label %lpret 1132 1133lpret: 1134 %6 = phi i32 [-1, %hotlp], [-2, %coldlp] 1135 %7 = add i32 %6, 42 1136 ret i32 %7 1137} 1138 1139!4 = !{!"branch_weights", i32 65536, i32 0} 1140 1141; Make sure that ehpad are scheduled from the least probable one 1142; to the most probable one. See selectBestCandidateBlock as to why. 1143declare void @clean(); 1144 1145define void @test_flow_unwind() personality ptr @pers { 1146; CHECK-LABEL: test_flow_unwind: 1147; CHECK: %entry 1148; CHECK: %then 1149; CHECK: %exit 1150; CHECK: %innerlp 1151; CHECK: %outerlp 1152; CHECK: %outercleanup 1153entry: 1154 %0 = invoke i32 @foo() 1155 to label %then unwind label %outerlp 1156 1157then: 1158 %1 = invoke i32 @bar() 1159 to label %exit unwind label %innerlp 1160 1161exit: 1162 ret void 1163 1164innerlp: 1165 %2 = landingpad { ptr, i32 } 1166 cleanup 1167 br label %innercleanup 1168 1169outerlp: 1170 %3 = landingpad { ptr, i32 } 1171 cleanup 1172 br label %outercleanup 1173 1174outercleanup: 1175 %4 = phi { ptr, i32 } [%2, %innercleanup], [%3, %outerlp] 1176 call void @clean() 1177 resume { ptr, i32 } %4 1178 1179innercleanup: 1180 call void @clean() 1181 br label %outercleanup 1182} 1183 1184declare void @hot_function() 1185 1186define void @test_hot_branch(ptr %a) { 1187; Test that a hot branch that has a probability a little larger than 80% will 1188; break CFG constrains when doing block placement. 1189; CHECK-LABEL: test_hot_branch: 1190; CHECK: %entry 1191; CHECK: %then 1192; CHECK: %exit 1193; CHECK: %else 1194 1195entry: 1196 %gep1 = getelementptr i32, ptr %a, i32 1 1197 %val1 = load i32, ptr %gep1 1198 %cond1 = icmp ugt i32 %val1, 1 1199 br i1 %cond1, label %then, label %else, !prof !5 1200 1201then: 1202 call void @hot_function() 1203 br label %exit 1204 1205else: 1206 call void @cold_function() 1207 br label %exit 1208 1209exit: 1210 call void @hot_function() 1211 ret void 1212} 1213 1214define void @test_hot_branch_profile(ptr %a) !prof !6 { 1215; Test that a hot branch that has a probability a little larger than 50% will 1216; break CFG constrains when doing block placement when profile is available. 1217; CHECK-LABEL: test_hot_branch_profile: 1218; CHECK: %entry 1219; CHECK: %then 1220; CHECK: %exit 1221; CHECK: %else 1222 1223entry: 1224 %gep1 = getelementptr i32, ptr %a, i32 1 1225 %val1 = load i32, ptr %gep1 1226 %cond1 = icmp ugt i32 %val1, 1 1227 br i1 %cond1, label %then, label %else, !prof !7 1228 1229then: 1230 call void @hot_function() 1231 br label %exit 1232 1233else: 1234 call void @cold_function() 1235 br label %exit 1236 1237exit: 1238 call void @hot_function() 1239 ret void 1240} 1241 1242define void @test_hot_branch_triangle_profile(ptr %a) !prof !6 { 1243; Test that a hot branch that has a probability a little larger than 80% will 1244; break triangle shaped CFG constrains when doing block placement if profile 1245; is present. 1246; CHECK-LABEL: test_hot_branch_triangle_profile: 1247; CHECK: %entry 1248; CHECK: %exit 1249; CHECK: %then 1250 1251entry: 1252 %gep1 = getelementptr i32, ptr %a, i32 1 1253 %val1 = load i32, ptr %gep1 1254 %cond1 = icmp ugt i32 %val1, 1 1255 br i1 %cond1, label %exit, label %then, !prof !5 1256 1257then: 1258 call void @hot_function() 1259 br label %exit 1260 1261exit: 1262 call void @hot_function() 1263 ret void 1264} 1265 1266define void @test_hot_branch_triangle_profile_topology(ptr %a) !prof !6 { 1267; Test that a hot branch that has a probability between 50% and 66% will not 1268; break triangle shaped CFG constrains when doing block placement if profile 1269; is present. 1270; CHECK-LABEL: test_hot_branch_triangle_profile_topology: 1271; CHECK: %entry 1272; CHECK: %then 1273; CHECK: %exit 1274 1275entry: 1276 %gep1 = getelementptr i32, ptr %a, i32 1 1277 %val1 = load i32, ptr %gep1 1278 %cond1 = icmp ugt i32 %val1, 1 1279 br i1 %cond1, label %exit, label %then, !prof !7 1280 1281then: 1282 call void @hot_function() 1283 br label %exit 1284 1285exit: 1286 call void @hot_function() 1287 ret void 1288} 1289 1290declare void @a() 1291declare void @b() 1292 1293define void @test_forked_hot_diamond(ptr %a) { 1294; Test that a hot-branch with probability > 80% followed by a 50/50 branch 1295; will not place the cold predecessor if the probability for the fallthrough 1296; remains above 80% 1297; CHECK-LABEL: test_forked_hot_diamond 1298; CHECK: %entry 1299; CHECK: %then 1300; CHECK: %fork1 1301; CHECK: %else 1302; CHECK: %fork2 1303; CHECK: %exit 1304entry: 1305 %gep1 = getelementptr i32, ptr %a, i32 1 1306 %val1 = load i32, ptr %gep1 1307 %cond1 = icmp ugt i32 %val1, 1 1308 br i1 %cond1, label %then, label %else, !prof !5 1309 1310then: 1311 call void @hot_function() 1312 %gep2 = getelementptr i32, ptr %a, i32 2 1313 %val2 = load i32, ptr %gep2 1314 %cond2 = icmp ugt i32 %val2, 2 1315 br i1 %cond2, label %fork1, label %fork2, !prof !8 1316 1317else: 1318 call void @cold_function() 1319 %gep3 = getelementptr i32, ptr %a, i32 3 1320 %val3 = load i32, ptr %gep3 1321 %cond3 = icmp ugt i32 %val3, 3 1322 br i1 %cond3, label %fork1, label %fork2, !prof !8 1323 1324fork1: 1325 call void @a() 1326 br label %exit 1327 1328fork2: 1329 call void @b() 1330 br label %exit 1331 1332exit: 1333 call void @hot_function() 1334 ret void 1335} 1336 1337define void @test_forked_hot_diamond_gets_cold(ptr %a) { 1338; Test that a hot-branch with probability > 80% followed by a 50/50 branch 1339; will place the cold predecessor if the probability for the fallthrough 1340; falls below 80% 1341; The probability for both branches is 85%. For then2 vs else1 1342; this results in a compounded probability of 83%. 1343; Neither then2->fork1 nor then2->fork2 has a large enough relative 1344; probability to break the CFG. 1345; Relative probs: 1346; then2 -> fork1 vs else1 -> fork1 = 71% 1347; then2 -> fork2 vs else2 -> fork2 = 74% 1348; CHECK-LABEL: test_forked_hot_diamond_gets_cold 1349; CHECK: %entry 1350; CHECK: %then1 1351; CHECK: %then2 1352; CHECK: %else1 1353; CHECK: %fork1 1354; CHECK: %else2 1355; CHECK: %fork2 1356; CHECK: %exit 1357entry: 1358 %gep1 = getelementptr i32, ptr %a, i32 1 1359 %val1 = load i32, ptr %gep1 1360 %cond1 = icmp ugt i32 %val1, 1 1361 br i1 %cond1, label %then1, label %else1, !prof !9 1362 1363then1: 1364 call void @hot_function() 1365 %gep2 = getelementptr i32, ptr %a, i32 2 1366 %val2 = load i32, ptr %gep2 1367 %cond2 = icmp ugt i32 %val2, 2 1368 br i1 %cond2, label %then2, label %else2, !prof !9 1369 1370else1: 1371 call void @cold_function() 1372 br label %fork1 1373 1374then2: 1375 call void @hot_function() 1376 %gep3 = getelementptr i32, ptr %a, i32 3 1377 %val3 = load i32, ptr %gep2 1378 %cond3 = icmp ugt i32 %val2, 3 1379 br i1 %cond3, label %fork1, label %fork2, !prof !8 1380 1381else2: 1382 call void @cold_function() 1383 br label %fork2 1384 1385fork1: 1386 call void @a() 1387 br label %exit 1388 1389fork2: 1390 call void @b() 1391 br label %exit 1392 1393exit: 1394 call void @hot_function() 1395 ret void 1396} 1397 1398define void @test_forked_hot_diamond_stays_hot(ptr %a) { 1399; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50 1400; branch will not place the cold predecessor as the probability for the 1401; fallthrough stays above 80% 1402; (1:8) followed by (1:1) is still (1:4) 1403; Here we use 90% probability because two in a row 1404; have a 89 % probability vs the original branch. 1405; CHECK-LABEL: test_forked_hot_diamond_stays_hot 1406; CHECK: %entry 1407; CHECK: %then1 1408; CHECK: %then2 1409; CHECK: %fork1 1410; CHECK: %else1 1411; CHECK: %else2 1412; CHECK: %fork2 1413; CHECK: %exit 1414entry: 1415 %gep1 = getelementptr i32, ptr %a, i32 1 1416 %val1 = load i32, ptr %gep1 1417 %cond1 = icmp ugt i32 %val1, 1 1418 br i1 %cond1, label %then1, label %else1, !prof !10 1419 1420then1: 1421 call void @hot_function() 1422 %gep2 = getelementptr i32, ptr %a, i32 2 1423 %val2 = load i32, ptr %gep2 1424 %cond2 = icmp ugt i32 %val2, 2 1425 br i1 %cond2, label %then2, label %else2, !prof !10 1426 1427else1: 1428 call void @cold_function() 1429 br label %fork1 1430 1431then2: 1432 call void @hot_function() 1433 %gep3 = getelementptr i32, ptr %a, i32 3 1434 %val3 = load i32, ptr %gep2 1435 %cond3 = icmp ugt i32 %val2, 3 1436 br i1 %cond3, label %fork1, label %fork2, !prof !8 1437 1438else2: 1439 call void @cold_function() 1440 br label %fork2 1441 1442fork1: 1443 call void @a() 1444 br label %exit 1445 1446fork2: 1447 call void @b() 1448 br label %exit 1449 1450exit: 1451 call void @hot_function() 1452 ret void 1453} 1454 1455; Because %endif has a higher frequency than %if, the calculations show we 1456; shouldn't tail-duplicate %endif so that we can place it after %if. We were 1457; previously undercounting the cost by ignoring execution frequency that didn't 1458; come from the %if->%endif path. 1459; CHECK-LABEL: higher_frequency_succ_tail_dup 1460; CHECK: %entry 1461; CHECK: %elseif 1462; CHECK: %else 1463; CHECK: %endif 1464; CHECK: %then 1465; CHECK: %ret 1466define void @higher_frequency_succ_tail_dup(i1 %a, i1 %b, i1 %c) { 1467entry: 1468 br label %if 1469if: ; preds = %entry 1470 call void @effect(i32 0) 1471 br i1 %a, label %elseif, label %endif, !prof !11 ; even 1472 1473elseif: ; preds = %if 1474 call void @effect(i32 1) 1475 br i1 %b, label %else, label %endif, !prof !11 ; even 1476 1477else: ; preds = %elseif 1478 call void @effect(i32 2) 1479 br label %endif 1480 1481endif: ; preds = %if, %elseif, %else 1482 br i1 %c, label %then, label %ret, !prof !12 ; 5 to 3 1483 1484then: ; preds = %endif 1485 call void @effect(i32 3) 1486 br label %ret 1487 1488ret: ; preds = %endif, %then 1489 ret void 1490} 1491 1492define i32 @not_rotate_if_extra_branch(i32 %count) { 1493; Test checks that there is no loop rotation 1494; if it introduces extra branch. 1495; Specifically in this case because best exit is .header 1496; but it has fallthrough to .middle block and last block in 1497; loop chain .slow does not have afallthrough to .header. 1498; CHECK-LABEL: not_rotate_if_extra_branch 1499; CHECK: %.entry 1500; CHECK: %.header 1501; CHECK: %.middle 1502; CHECK: %.backedge 1503; CHECK: %.slow 1504; CHECK: %.bailout 1505; CHECK: %.stop 1506.entry: 1507 %sum.0 = shl nsw i32 %count, 1 1508 br label %.header 1509 1510.header: 1511 %i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ] 1512 %sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ] 1513 %is_exc = icmp sgt i32 %i, 9000000 1514 br i1 %is_exc, label %.bailout, label %.middle, !prof !13 1515 1516.bailout: 1517 %sum.2 = add nsw i32 %count, 1 1518 br label %.stop 1519 1520.middle: 1521 %pr.1 = and i32 %i, 1023 1522 %pr.2 = icmp eq i32 %pr.1, 0 1523 br i1 %pr.2, label %.slow, label %.backedge, !prof !14 1524 1525.slow: 1526 tail call void @effect(i32 %sum) 1527 br label %.backedge 1528 1529.backedge: 1530 %sum.1 = add nsw i32 %i, %sum 1531 %i.1 = add nsw i32 %i, 1 1532 %end = icmp slt i32 %i.1, %count 1533 br i1 %end, label %.header, label %.stop, !prof !15 1534 1535.stop: 1536 %sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ] 1537 ret i32 %sum.phi 1538} 1539 1540define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) { 1541; This is a regression test against patch avoid loop rotation if 1542; it introduce an extra btanch. 1543; CHECK-LABEL: not_rotate_if_extra_branch_regression 1544; CHECK: %.entry 1545; CHECK: %.first_backedge 1546; CHECK: %.second_header 1547; CHECK: %.slow 1548.entry: 1549 %sum.0 = shl nsw i32 %count, 1 1550 br label %.first_header 1551 1552.first_header: 1553 %i = phi i32 [ %i.1, %.first_backedge ], [ 0, %.entry ] 1554 %is_bo1 = icmp sgt i32 %i, 9000000 1555 br i1 %is_bo1, label %.bailout, label %.first_backedge, !prof !14 1556 1557.first_backedge: 1558 %i.1 = add nsw i32 %i, 1 1559 %end = icmp slt i32 %i.1, %count 1560 br i1 %end, label %.first_header, label %.second_header, !prof !13 1561 1562.second_header: 1563 %j = phi i32 [ %j.1, %.second_backedge ], [ %init, %.first_backedge ] 1564 %end.2 = icmp sgt i32 %j, %count 1565 br i1 %end.2, label %.stop, label %.second_middle, !prof !14 1566 1567.second_middle: 1568 %is_slow = icmp sgt i32 %j, 9000000 1569 br i1 %is_slow, label %.slow, label %.second_backedge, !prof !14 1570 1571.slow: 1572 tail call void @effect(i32 %j) 1573 br label %.second_backedge 1574 1575.second_backedge: 1576 %j.1 = add nsw i32 %j, 1 1577 %end.3 = icmp slt i32 %j, 10000000 1578 br i1 %end.3, label %.second_header, label %.stop, !prof !13 1579 1580.stop: 1581 %res = add nsw i32 %j, %i.1 1582 ret i32 %res 1583 1584.bailout: 1585 ret i32 0 1586} 1587 1588declare void @effect(i32) 1589 1590!5 = !{!"branch_weights", i32 84, i32 16} 1591!6 = !{!"function_entry_count", i32 10} 1592!7 = !{!"branch_weights", i32 60, i32 40} 1593!8 = !{!"branch_weights", i32 5001, i32 4999} 1594!9 = !{!"branch_weights", i32 85, i32 15} 1595!10 = !{!"branch_weights", i32 90, i32 10} 1596!11 = !{!"branch_weights", i32 1, i32 1} 1597!12 = !{!"branch_weights", i32 5, i32 3} 1598!13 = !{!"branch_weights", i32 1, i32 1} 1599!14 = !{!"branch_weights", i32 1, i32 1023} 1600!15 = !{!"branch_weights", i32 4095, i32 1} 1601