1; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 | opt -passes='print<block-freq>' -disable-output -use-iterative-bfi-inference 2>&1 | FileCheck %s 2; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 -S | FileCheck %s --check-prefix=CHECK2 3; RUN: opt < %s -passes='print<block-freq>' -use-iterative-bfi-inference -disable-output 2>&1 | FileCheck %s --check-prefix=CHECK3 4 5; The C++ code for this test case is from c-parse.c in 403.gcc (SPEC2006) 6; The problem with BFI for the test is solved by applying iterative inference. 7; The corresponding CFG graph is shown below, with intended counts for every 8; basic block. The hot loop, b3->b4->b2, is not getting proper (large) counts 9; unless the -use-iterative-bfi-inference option is specified. 10; 11; +-------------------------------------------+ 12; | | 13; | +----------+ | 14; | | b1 [1] | | 15; | +----------+ | 16; | | | 17; | | | 18; | v | 19; | +----------+ | 20; | +------------> | b2 [625] | -+ | 21; | | +----------+ | | 22; | | | | | 23; | | | | | 24; | | v | | 25; | +----------+ +----------+ | | 26; | | b4 [624] | <-- | b3 [625] | <+---------+ 27; | +----------+ +----------+ | 28; | | | 29; +----+ | | 30; | v v 31; +----------+ +--------------------+ 32; | b8 [1] | <-- | b7 [2] | 33; +----------+ +--------------------+ 34; | ^ 35; | | 36; v | 37; +----------+ +----------+ | 38; | b9 [1] | <-- | b5 [2] | | 39; +----------+ +----------+ | 40; | | 41; | | 42; v | 43; +----------+ | 44; | b6 [1] | -+ 45; +----------+ 46 47@yydebug = dso_local global i32 0, align 4 48 49; Function Attrs: noinline nounwind uwtable 50define dso_local i32 @yyparse_1() #0 { 51b1: 52 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 1, i32 0, i64 -1) 53 %0 = load i32, ptr @yydebug, align 4 54 %cmp = icmp ne i32 %0, 0 55 br label %b2 56; CHECK: - b1: float = {{.*}}, int = {{.*}}, count = 1 57 58b2: 59 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 2, i32 0, i64 -1) 60 br i1 %cmp, label %b7, label %b3 61; CHECK: - b2: float = {{.*}}, int = {{.*}}, count = 586 62 63b3: 64 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 3, i32 0, i64 -1) 65 br i1 %cmp, label %b7, label %b4 66; CHECK: - b3: float = {{.*}}, int = {{.*}}, count = 586 67; CHECK2: br i1 %cmp, label %b7, label %b4, 68; CHECK2-SAME: !prof ![[END172_PROF:[0-9]+]] 69 70b4: 71 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 4, i32 0, i64 -1) 72 br label %b2 73; CHECK: - b4: float = {{.*}}, int = {{.*}}, count = 585 74 75b5: 76 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 5, i32 0, i64 -1) 77 br i1 %cmp, label %b9, label %b6 78; CHECK: - b5: float = {{.*}}, int = {{.*}}, count = 2 79 80b6: 81 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 6, i32 0, i64 -1) 82 br label %b7 83; CHECK: - b6: float = {{.*}}, int = {{.*}}, count = 1 84 85b7: 86 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 7, i32 0, i64 -1) 87 br i1 %cmp, label %b5, label %b8 88; CHECK: - b7: float = {{.*}}, int = {{.*}}, count = 2 89; CHECK2: br i1 %cmp, label %b5, label %b8, 90; CHECK2-SAME: !prof ![[FALSE4858_PROF:[0-9]+]] 91 92b8: 93 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 8, i32 0, i64 -1) 94 br label %b3 95; CHECK: - b8: float = {{.*}}, int = {{.*}}, count = 1 96 97b9: 98 call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 9, i32 0, i64 -1) 99 %1 = load i32, ptr @yydebug, align 4 100 ret i32 %1 101; CHECK: - b9: float = {{.*}}, int = {{.*}}, count = 1 102 103} 104 105; Another difficult (for BFI) instance with irreducible loops, 106; containing 'indirectbr'. The corresponding CFG graph is shown below, with 107; intended counts for every basic block. 108; 109; +-----------+ 110; | b1 [1] | 111; +-----------+ 112; | 113; | 114; v 115; +------------------------+ 116; +- | b2 [86] | <+ 117; | +------------------------+ | 118; | | | | 119; | | | | 120; | v | | 121; | +-----------+ | | 122; | | b3 [8212] | <+-------+ | 123; | +-----------+ | | | 124; | | | | | 125; | | | | | 126; | v v | | 127; | +------------------------+ | 128; | | indirectgoto [17747] | -+ 129; | +------------------------+ 130; | | ^ | 131; | | +--+ 132; | v 133; | +-----------+ 134; +> | b4 [1] | 135; +-----------+ 136 137; Function Attrs: nounwind uwtable 138define dso_local i32 @foo1() #0 !prof !132 { 139b1: 140 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 1, i32 0, i64 -1) 141 %0 = load i32, ptr @yydebug, align 4 142 %cmp = icmp ne i32 %0, 0 143 br label %b2 144; CHECK3: - b1: float = {{.*}}, int = {{.*}}, count = 1 145 146b2: 147 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 2, i32 0, i64 -1) 148 %1 = load i32, ptr @yydebug, align 4 149 switch i32 %1, label %b4 [ 150 i32 1, label %indirectgoto 151 i32 2, label %b3 152 ], !prof !133 153; CHECK3: - b2: float = {{.*}}, int = {{.*}}, count = 86 154 155b3: 156 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 3, i32 0, i64 -1) 157 br label %indirectgoto 158; CHECK3: - b3: float = {{.*}}, int = {{.*}}, count = 8212 159 160b4: 161 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 4, i32 0, i64 -1) 162 %2 = load i32, ptr @yydebug, align 4 163 ret i32 %2 164; CHECK3: - b4: float = {{.*}}, int = {{.*}}, count = 1 165 166indirectgoto: 167 %indirect.goto.dest = alloca i8, align 4 168 call void @llvm.pseudoprobe(i64 7682762345278052905, i64 5, i32 0, i64 -1) 169 indirectbr ptr %indirect.goto.dest, [label %b2, label %indirectgoto, label %b4, label %b3], !prof !134 170; CHECK3: - indirectgoto: float = {{.*}}, int = {{.*}}, count = 17747 171 172} 173 174declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1 175 176attributes #0 = { noinline nounwind uwtable "use-sample-profile"} 177attributes #1 = { nounwind } 178 179!llvm.pseudo_probe_desc = !{!1079, !4496} 180!1079 = !{i64 -7702751003264189226, i64 158496288380146391, !"yyparse_1", null} 181!4496 = !{i64 7682762345278052905, i64 404850113186107133, !"foo1", null} 182!132 = !{!"function_entry_count", i64 1} 183!133 = !{!"branch_weights", i32 0, i32 86, i32 0} 184!134 = !{!"branch_weights", i32 85, i32 9449, i32 1, i32 8212} 185 186; CHECK2: ![[END172_PROF]] = !{!"branch_weights", i32 1, i32 1003} 187; CHECK2: ![[FALSE4858_PROF]] = !{!"branch_weights", i32 2, i32 1} 188