1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -O2 -o - %s | FileCheck %s 3target datalayout = "e-m:e-i64:64-n32:64" 4target triple = "powerpc64le-grtev4-linux-gnu" 5 6; Intended layout: 7; The code for tail-duplication during layout will produce the layout: 8; test1 9; test2 10; body1 (with copy of test2) 11; body2 12; exit 13 14define void @tail_dup_break_cfg(i32 %tag) { 15; CHECK-LABEL: tail_dup_break_cfg: 16; CHECK: # %bb.0: # %entry 17; CHECK-NEXT: mflr 0 18; CHECK-NEXT: .cfi_def_cfa_offset 48 19; CHECK-NEXT: .cfi_offset lr, 16 20; CHECK-NEXT: .cfi_offset r30, -16 21; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill 22; CHECK-NEXT: stdu 1, -48(1) 23; CHECK-NEXT: mr 30, 3 24; CHECK-NEXT: std 0, 64(1) 25; CHECK-NEXT: andi. 3, 30, 1 26; CHECK-NEXT: bc 12, 1, .LBB0_3 27; CHECK-NEXT: # %bb.1: # %test2 28; CHECK-NEXT: andi. 3, 30, 2 29; CHECK-NEXT: bne 0, .LBB0_4 30; CHECK-NEXT: .LBB0_2: # %exit 31; CHECK-NEXT: addi 1, 1, 48 32; CHECK-NEXT: ld 0, 16(1) 33; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload 34; CHECK-NEXT: mtlr 0 35; CHECK-NEXT: blr 36; CHECK-NEXT: .LBB0_3: # %body1 37; CHECK-NEXT: bl a 38; CHECK-NEXT: nop 39; CHECK-NEXT: bl a 40; CHECK-NEXT: nop 41; CHECK-NEXT: bl a 42; CHECK-NEXT: nop 43; CHECK-NEXT: bl a 44; CHECK-NEXT: nop 45; CHECK-NEXT: andi. 3, 30, 2 46; CHECK-NEXT: beq 0, .LBB0_2 47; CHECK-NEXT: .LBB0_4: # %body2 48; CHECK-NEXT: bl b 49; CHECK-NEXT: nop 50; CHECK-NEXT: bl b 51; CHECK-NEXT: nop 52; CHECK-NEXT: bl b 53; CHECK-NEXT: nop 54; CHECK-NEXT: bl b 55; CHECK-NEXT: nop 56; CHECK-NEXT: b .LBB0_2 57entry: 58 br label %test1 59test1: 60 %tagbit1 = and i32 %tag, 1 61 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 62 br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely 63body1: 64 call void @a() 65 call void @a() 66 call void @a() 67 call void @a() 68 br label %test2 69test2: 70 %tagbit2 = and i32 %tag, 2 71 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 72 br i1 %tagbit2eq0, label %exit, label %body2, !prof !1 ; %exit more likely 73body2: 74 call void @b() 75 call void @b() 76 call void @b() 77 call void @b() 78 br label %exit 79exit: 80 ret void 81} 82 83; The branch weights here hint that we shouldn't tail duplicate in this case. 84define void @tail_dup_dont_break_cfg(i32 %tag) { 85; CHECK-LABEL: tail_dup_dont_break_cfg: 86; CHECK: # %bb.0: # %entry 87; CHECK-NEXT: mflr 0 88; CHECK-NEXT: .cfi_def_cfa_offset 48 89; CHECK-NEXT: .cfi_offset lr, 16 90; CHECK-NEXT: .cfi_offset r30, -16 91; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill 92; CHECK-NEXT: stdu 1, -48(1) 93; CHECK-NEXT: mr 30, 3 94; CHECK-NEXT: std 0, 64(1) 95; CHECK-NEXT: andi. 3, 30, 1 96; CHECK-NEXT: bc 4, 1, .LBB1_2 97; CHECK-NEXT: # %bb.1: # %body1 98; CHECK-NEXT: bl a 99; CHECK-NEXT: nop 100; CHECK-NEXT: bl a 101; CHECK-NEXT: nop 102; CHECK-NEXT: bl a 103; CHECK-NEXT: nop 104; CHECK-NEXT: bl a 105; CHECK-NEXT: nop 106; CHECK-NEXT: .LBB1_2: # %test2 107; CHECK-NEXT: andi. 3, 30, 2 108; CHECK-NEXT: beq 0, .LBB1_4 109; CHECK-NEXT: # %bb.3: # %body2 110; CHECK-NEXT: bl b 111; CHECK-NEXT: nop 112; CHECK-NEXT: bl b 113; CHECK-NEXT: nop 114; CHECK-NEXT: bl b 115; CHECK-NEXT: nop 116; CHECK-NEXT: bl b 117; CHECK-NEXT: nop 118; CHECK-NEXT: .LBB1_4: # %exit 119; CHECK-NEXT: addi 1, 1, 48 120; CHECK-NEXT: ld 0, 16(1) 121; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload 122; CHECK-NEXT: mtlr 0 123; CHECK-NEXT: blr 124entry: 125 br label %test1 126test1: 127 %tagbit1 = and i32 %tag, 1 128 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 129 br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely 130body1: 131 call void @a() 132 call void @a() 133 call void @a() 134 call void @a() 135 br label %test2 136test2: 137 %tagbit2 = and i32 %tag, 2 138 %tagbit2eq0 = icmp ne i32 %tagbit2, 0 139 br i1 %tagbit2eq0, label %body2, label %exit, !prof !3 ; %body2 more likely 140body2: 141 call void @b() 142 call void @b() 143 call void @b() 144 call void @b() 145 br label %exit 146exit: 147 ret void 148} 149 150declare void @a() 151declare void @b() 152declare void @c() 153declare void @d() 154 155; This function arranges for the successors of %succ to have already been laid 156; out. When we consider whether to lay out succ after bb and to tail-duplicate 157; it, v and ret have already been placed, so we tail-duplicate as it removes a 158; branch and strictly increases fallthrough 159define void @tail_dup_no_succ(i32 %tag) { 160; CHECK-LABEL: tail_dup_no_succ: 161; CHECK: # %bb.0: # %entry 162; CHECK-NEXT: mflr 0 163; CHECK-NEXT: .cfi_def_cfa_offset 48 164; CHECK-NEXT: .cfi_offset lr, 16 165; CHECK-NEXT: .cfi_offset r30, -16 166; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill 167; CHECK-NEXT: stdu 1, -48(1) 168; CHECK-NEXT: andi. 4, 3, 1 169; CHECK-NEXT: std 0, 64(1) 170; CHECK-NEXT: bc 12, 1, .LBB2_3 171; CHECK-NEXT: .LBB2_1: # %v 172; CHECK-NEXT: bl d 173; CHECK-NEXT: nop 174; CHECK-NEXT: bl d 175; CHECK-NEXT: nop 176; CHECK-NEXT: .LBB2_2: # %ret 177; CHECK-NEXT: addi 1, 1, 48 178; CHECK-NEXT: ld 0, 16(1) 179; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload 180; CHECK-NEXT: mtlr 0 181; CHECK-NEXT: blr 182; CHECK-NEXT: .LBB2_3: # %bb 183; CHECK-NEXT: andi. 4, 3, 2 184; CHECK-NEXT: bne 0, .LBB2_5 185; CHECK-NEXT: # %bb.4: # %succ 186; CHECK-NEXT: andi. 3, 3, 4 187; CHECK-NEXT: beq 0, .LBB2_2 188; CHECK-NEXT: b .LBB2_1 189; CHECK-NEXT: .LBB2_5: # %c 190; CHECK-NEXT: mr 30, 3 191; CHECK-NEXT: bl c 192; CHECK-NEXT: nop 193; CHECK-NEXT: bl c 194; CHECK-NEXT: nop 195; CHECK-NEXT: mr 3, 30 196; CHECK-NEXT: andi. 3, 3, 4 197; CHECK-NEXT: beq 0, .LBB2_2 198; CHECK-NEXT: b .LBB2_1 199entry: 200 %tagbit1 = and i32 %tag, 1 201 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 202 br i1 %tagbit1eq0, label %v, label %bb, !prof !2 ; %v very much more likely 203bb: 204 %tagbit2 = and i32 %tag, 2 205 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 206 br i1 %tagbit2eq0, label %succ, label %c, !prof !3 ; %succ more likely 207c: 208 call void @c() 209 call void @c() 210 br label %succ 211succ: 212 %tagbit3 = and i32 %tag, 4 213 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 214 br i1 %tagbit3eq0, label %ret, label %v, !prof !1 ; %u more likely 215v: 216 call void @d() 217 call void @d() 218 br label %ret 219ret: 220 ret void 221} 222 223!1 = !{!"branch_weights", i32 5, i32 3} 224!2 = !{!"branch_weights", i32 95, i32 5} 225!3 = !{!"branch_weights", i32 8, i32 3} 226