1; RUN: opt < %s -passes='print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_BEFORE 2; RUN: opt < %s -passes='loop(loop-rotate),print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_AFTER 3; RUN: opt < %s -passes='loop(loop-rotate)' -S | FileCheck %s --check-prefixes=IR 4 5@g = global i32 0 6 7; We should get the same "count =" results for "outer_loop_body" and 8; "inner_loop_body" before and after the transformation. 9 10; BFI_BEFORE-LABEL: block-frequency-info: func0 11; BFI_BEFORE: - entry: {{.*}} count = 1 12; BFI_BEFORE: - outer_loop_header: {{.*}} count = 1001 13; BFI_BEFORE: - outer_loop_body: {{.*}} count = 1000 14; BFI_BEFORE: - inner_loop_header: {{.*}} count = 4000 15; BFI_BEFORE: - inner_loop_body: {{.*}} count = 3000 16; BFI_BEFORE: - inner_loop_exit: {{.*}} count = 1000 17; BFI_BEFORE: - outer_loop_exit: {{.*}} count = 1 18 19; BFI_AFTER-LABEL: block-frequency-info: func0 20; BFI_AFTER: - entry: {{.*}} count = 1 21; BFI_AFTER: - outer_loop_body: {{.*}} count = 1000 22; BFI_AFTER: - inner_loop_body: {{.*}} count = 3000 23; BFI_AFTER: - inner_loop_exit: {{.*}} count = 1000 24; BFI_AFTER: - outer_loop_exit: {{.*}} count = 1 25 26; IR-LABEL: define void @func0 27; IR: inner_loop_body: 28; IR: br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof [[PROF_FUNC0_0:![0-9]+]] 29; IR: inner_loop_exit: 30; IR: br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof [[PROF_FUNC0_1:![0-9]+]] 31; 32; A function with known loop-bounds where after loop-rotation we end with an 33; unconditional branch in the pre-header. 34define void @func0() !prof !0 { 35entry: 36 br label %outer_loop_header 37 38outer_loop_header: 39 %i0 = phi i32 [0, %entry], [%i0_inc, %inner_loop_exit] 40 %cmp0 = icmp slt i32 %i0, 1000 41 br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof !1 42 43outer_loop_body: 44 store volatile i32 %i0, ptr @g, align 4 45 br label %inner_loop_header 46 47inner_loop_header: 48 %i1 = phi i32 [0, %outer_loop_body], [%i1_inc, %inner_loop_body] 49 %cmp1 = icmp slt i32 %i1, 3 50 br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof !2 51 52inner_loop_body: 53 store volatile i32 %i1, ptr @g, align 4 54 %i1_inc = add i32 %i1, 1 55 br label %inner_loop_header 56 57inner_loop_exit: 58 %i0_inc = add i32 %i0, 1 59 br label %outer_loop_header 60 61outer_loop_exit: 62 ret void 63} 64 65; BFI_BEFORE-LABEL: block-frequency-info: func1 66; BFI_BEFORE: - entry: {{.*}} count = 1024 67; BFI_BEFORE: - loop_header: {{.*}} count = 21504 68; BFI_BEFORE: - loop_body: {{.*}} count = 20480 69; BFI_BEFORE: - loop_exit: {{.*}} count = 1024 70 71; BFI_AFTER-LABEL: block-frequency-info: func1 72; BFI_AFTER: - entry: {{.*}} count = 1024 73; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1016 74; BFI_AFTER: - loop_body: {{.*}} count = 20480 75; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1016 76; BFI_AFTER: - loop_exit: {{.*}} count = 1024 77 78; IR-LABEL: define void @func1 79; IR: entry: 80; IR: br i1 %cmp1, label %loop_body.lr.ph, label %loop_exit, !prof [[PROF_FUNC1_0:![0-9]+]] 81 82; IR: loop_body: 83; IR: br i1 %cmp, label %loop_body, label %loop_header.loop_exit_crit_edge, !prof [[PROF_FUNC1_1:![0-9]+]] 84 85; A function with unknown loop-bounds so loop-rotation ends up with a 86; condition jump in pre-header and loop body. branch_weight shows body is 87; executed more often than header. 88define void @func1(i32 %n) !prof !3 { 89entry: 90 br label %loop_header 91 92loop_header: 93 %i = phi i32 [0, %entry], [%i_inc, %loop_body] 94 %cmp = icmp slt i32 %i, %n 95 br i1 %cmp, label %loop_body, label %loop_exit, !prof !4 96 97loop_body: 98 store volatile i32 %i, ptr @g, align 4 99 %i_inc = add i32 %i, 1 100 br label %loop_header 101 102loop_exit: 103 ret void 104} 105 106; BFI_BEFORE-LABEL: block-frequency-info: func2 107; BFI_BEFORE: - entry: {{.*}} count = 1024 108; BFI_BEFORE: - loop_header: {{.*}} count = 1056 109; BFI_BEFORE: - loop_body: {{.*}} count = 32 110; BFI_BEFORE: - loop_exit: {{.*}} count = 1024 111 112; BFI_AFTER-LABEL: block-frequency-info: func2 113; - entry: {{.*}} count = 1024 114; - loop_body.lr.ph: {{.*}} count = 32 115; - loop_body: {{.*}} count = 32 116; - loop_header.loop_exit_crit_edge: {{.*}} count = 32 117; - loop_exit: {{.*}} count = 1024 118 119; IR-LABEL: define void @func2 120; IR: entry: 121; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC2_0:![0-9]+]] 122 123; IR: loop_body: 124; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC2_1:![0-9]+]] 125 126; A function with unknown loop-bounds so loop-rotation ends up with a 127; condition jump in pre-header and loop body. Similar to `func1` but here 128; loop-exit count is higher than backedge count. 129define void @func2(i32 %n) !prof !3 { 130entry: 131 br label %loop_header 132 133loop_header: 134 %i = phi i32 [0, %entry], [%i_inc, %loop_body] 135 %cmp = icmp slt i32 %i, %n 136 br i1 %cmp, label %loop_exit, label %loop_body, !prof !5 137 138loop_body: 139 store volatile i32 %i, ptr @g, align 4 140 %i_inc = add i32 %i, 1 141 br label %loop_header 142 143loop_exit: 144 ret void 145} 146 147; BFI_BEFORE-LABEL: block-frequency-info: func3_zero_branch_weight 148; BFI_BEFORE: - entry: {{.*}} count = 1024 149; BFI_BEFORE: - loop_header: {{.*}} count = 2199023255552 150; BFI_BEFORE: - loop_body: {{.*}} count = 2199023254528 151; BFI_BEFORE: - loop_exit: {{.*}} count = 1024 152 153; BFI_AFTER-LABEL: block-frequency-info: func3_zero_branch_weight 154; BFI_AFTER: - entry: {{.*}} count = 1024 155; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1024 156; BFI_AFTER: - loop_body: {{.*}} count = 2199023255552 157; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1024 158; BFI_AFTER: - loop_exit: {{.*}} count = 1024 159 160; IR-LABEL: define void @func3_zero_branch_weight 161; IR: entry: 162; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC3_0:![0-9]+]] 163 164; IR: loop_body: 165; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC3_0]] 166 167define void @func3_zero_branch_weight(i32 %n) !prof !3 { 168entry: 169 br label %loop_header 170 171loop_header: 172 %i = phi i32 [0, %entry], [%i_inc, %loop_body] 173 %cmp = icmp slt i32 %i, %n 174 br i1 %cmp, label %loop_exit, label %loop_body, !prof !6 175 176loop_body: 177 store volatile i32 %i, ptr @g, align 4 178 %i_inc = add i32 %i, 1 179 br label %loop_header 180 181loop_exit: 182 ret void 183} 184 185; IR-LABEL: define void @func4_zero_branch_weight 186; IR: entry: 187; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC4_0:![0-9]+]] 188 189; IR: loop_body: 190; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC4_0]] 191 192define void @func4_zero_branch_weight(i32 %n) !prof !3 { 193entry: 194 br label %loop_header 195 196loop_header: 197 %i = phi i32 [0, %entry], [%i_inc, %loop_body] 198 %cmp = icmp slt i32 %i, %n 199 br i1 %cmp, label %loop_exit, label %loop_body, !prof !7 200 201loop_body: 202 store volatile i32 %i, ptr @g, align 4 203 %i_inc = add i32 %i, 1 204 br label %loop_header 205 206loop_exit: 207 ret void 208} 209 210; IR-LABEL: define void @func5_zero_branch_weight 211; IR: entry: 212; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC5_0:![0-9]+]] 213 214; IR: loop_body: 215; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC5_0]] 216 217define void @func5_zero_branch_weight(i32 %n) !prof !3 { 218entry: 219 br label %loop_header 220 221loop_header: 222 %i = phi i32 [0, %entry], [%i_inc, %loop_body] 223 %cmp = icmp slt i32 %i, %n 224 br i1 %cmp, label %loop_exit, label %loop_body, !prof !8 225 226loop_body: 227 store volatile i32 %i, ptr @g, align 4 228 %i_inc = add i32 %i, 1 229 br label %loop_header 230 231loop_exit: 232 ret void 233} 234 235; BFI_BEFORE-LABEL: block-frequency-info: func6_inaccurate_branch_weight 236; BFI_BEFORE: - entry: {{.*}} count = 1024 237; BFI_BEFORE: - loop_header: {{.*}} count = 2047 238; BFI_BEFORE: - loop_body: {{.*}} count = 1023 239; BFI_BEFORE: - loop_exit: {{.*}} count = 1024 240 241; BFI_AFTER-LABEL: block-frequency-info: func6_inaccurate_branch_weight 242; BFI_AFTER: - entry: {{.*}} count = 1024 243; BFI_AFTER: - loop_body: {{.*}} count = 1024 244; BFI_AFTER: - loop_exit: {{.*}} count = 1024 245 246; IR-LABEL: define void @func6_inaccurate_branch_weight( 247; IR: entry: 248; IR: br label %loop_body 249; IR: loop_body: 250; IR: br i1 %cmp, label %loop_body, label %loop_exit, !prof [[PROF_FUNC6_0:![0-9]+]] 251; IR: loop_exit: 252; IR: ret void 253 254; Branch weight from sample-based PGO may be inaccurate due to sampling. 255; Count for loop_body in following case should be not less than loop_exit. 256; However this may not hold for Sample-based PGO. 257define void @func6_inaccurate_branch_weight() !prof !3 { 258entry: 259 br label %loop_header 260 261loop_header: 262 %i = phi i32 [0, %entry], [%i_inc, %loop_body] 263 %cmp = icmp slt i32 %i, 2 264 br i1 %cmp, label %loop_body, label %loop_exit, !prof !9 265 266loop_body: 267 store volatile i32 %i, ptr @g, align 4 268 %i_inc = add i32 %i, 1 269 br label %loop_header 270 271loop_exit: 272 ret void 273} 274 275!0 = !{!"function_entry_count", i64 1} 276!1 = !{!"branch_weights", i32 1000, i32 1} 277!2 = !{!"branch_weights", i32 3000, i32 1000} 278!3 = !{!"function_entry_count", i64 1024} 279!4 = !{!"branch_weights", i32 40, i32 2} 280!5 = !{!"branch_weights", i32 10240, i32 320} 281!6 = !{!"branch_weights", i32 0, i32 1} 282!7 = !{!"branch_weights", i32 1, i32 0} 283!8 = !{!"branch_weights", i32 0, i32 0} 284!9 = !{!"branch_weights", i32 1023, i32 1024} 285 286; IR: [[PROF_FUNC0_0]] = !{!"branch_weights", i32 2000, i32 1000} 287; IR: [[PROF_FUNC0_1]] = !{!"branch_weights", i32 999, i32 1} 288; IR: [[PROF_FUNC1_0]] = !{!"branch_weights", i32 127, i32 1} 289; IR: [[PROF_FUNC1_1]] = !{!"branch_weights", i32 2433, i32 127} 290; IR: [[PROF_FUNC2_0]] = !{!"branch_weights", i32 9920, i32 320} 291; IR: [[PROF_FUNC2_1]] = !{!"branch_weights", i32 320, i32 0} 292; IR: [[PROF_FUNC3_0]] = !{!"branch_weights", i32 0, i32 1} 293; IR: [[PROF_FUNC4_0]] = !{!"branch_weights", i32 1, i32 0} 294; IR: [[PROF_FUNC5_0]] = !{!"branch_weights", i32 0, i32 0} 295; IR: [[PROF_FUNC6_0]] = !{!"branch_weights", i32 0, i32 1024} 296