1; This test contains extremely tricky call graph structures for the inliner to 2; handle correctly. They form cycles where the inliner introduces code that is 3; immediately or can eventually be transformed back into the original code. And 4; each step changes the call graph and so will trigger iteration. This requires 5; some out-of-band way to prevent infinitely re-inlining and re-transforming the 6; code. 7; 8; RUN: opt < %s -passes='cgscc(inline,function(sroa,instcombine))' -inline-threshold=50 -S | FileCheck %s 9 10 11; The `test1_*` collection of functions form a directly cycling pattern. 12 13define void @test1_a(ptr %ptr) { 14; CHECK-LABEL: define void @test1_a( 15entry: 16 call void @test1_b(ptr @test1_b, i1 false, i32 0) 17; Inlining and simplifying this call will reliably produce the exact same call, 18; over and over again. However, each inlining increments the count, and so we 19; expect this test case to stop after one round of inlining with a final 20; argument of '1'. 21; CHECK-NOT: call 22; CHECK: call void @test1_b(ptr nonnull @test1_b, i1 false, i32 1) 23; CHECK-NOT: call 24 25 ret void 26} 27 28define void @test1_b(ptr %arg, i1 %flag, i32 %inline_count) { 29; CHECK-LABEL: define void @test1_b( 30entry: 31 %a = alloca ptr 32 store ptr %arg, ptr %a 33; This alloca and store should remain through any optimization. 34; CHECK: %[[A:.*]] = alloca 35; CHECK: store ptr %arg, ptr %[[A]] 36 37 br i1 %flag, label %bb1, label %bb2 38 39bb1: 40 call void @test1_a(ptr %a) noinline 41 br label %bb2 42 43bb2: 44 %p = load ptr, ptr %a 45 %inline_count_inc = add i32 %inline_count, 1 46 call void %p(ptr %arg, i1 %flag, i32 %inline_count_inc) 47; And we should continue to load and call indirectly through optimization. 48; CHECK: %[[P:.*]] = load ptr, ptr %[[A]] 49; CHECK: call void %[[P]]( 50 51 ret void 52} 53 54define void @test2_a(ptr %ptr) { 55; CHECK-LABEL: define void @test2_a( 56entry: 57 call void @test2_b(ptr @test2_b, ptr @test2_c, i1 false, i32 0) 58; Inlining and simplifying this call will reliably produce the exact same call, 59; but only after doing two rounds if inlining, first from @test2_b then 60; @test2_c. We check the exact number of inlining rounds before we cut off to 61; break the cycle by inspecting the last paramater that gets incremented with 62; each inlined function body. 63; CHECK-NOT: call 64; CHECK: call void @test2_b(ptr nonnull @test2_b, ptr nonnull @test2_c, i1 false, i32 2) 65; CHECK-NOT: call 66 ret void 67} 68 69define void @test2_b(ptr %arg1, ptr %arg2, i1 %flag, i32 %inline_count) { 70; CHECK-LABEL: define void @test2_b( 71entry: 72 %a = alloca ptr 73 store ptr %arg2, ptr %a 74; This alloca and store should remain through any optimization. 75; CHECK: %[[A:.*]] = alloca 76; CHECK: store ptr %arg2, ptr %[[A]] 77 78 br i1 %flag, label %bb1, label %bb2 79 80bb1: 81 call void @test2_a(ptr %a) noinline 82 br label %bb2 83 84bb2: 85 %p = load ptr, ptr %a 86 %inline_count_inc = add i32 %inline_count, 1 87 call void %p(ptr %arg1, ptr %arg2, i1 %flag, i32 %inline_count_inc) 88; And we should continue to load and call indirectly through optimization. 89; CHECK: %[[P:.*]] = load ptr, ptr %[[A]] 90; CHECK: call void %[[P]]( 91 92 ret void 93} 94 95define void @test2_c(ptr %arg1, ptr %arg2, i1 %flag, i32 %inline_count) { 96; CHECK-LABEL: define void @test2_c( 97entry: 98 %a = alloca ptr 99 store ptr %arg1, ptr %a 100; This alloca and store should remain through any optimization. 101; CHECK: %[[A:.*]] = alloca 102; CHECK: store ptr %arg1, ptr %[[A]] 103 104 br i1 %flag, label %bb1, label %bb2 105 106bb1: 107 call void @test2_a(ptr %a) noinline 108 br label %bb2 109 110bb2: 111 %p = load ptr, ptr %a 112 %inline_count_inc = add i32 %inline_count, 1 113 call void %p(ptr %arg1, ptr %arg2, i1 %flag, i32 %inline_count_inc) 114; And we should continue to load and call indirectly through optimization. 115; CHECK: %[[P:.*]] = load ptr, ptr %[[A]] 116; CHECK: call void %[[P]]( 117 118 ret void 119} 120 121; Another infinite inlining case. The initial callgraph is like following: 122; 123; test3_a <---> test3_b 124; | ^ 125; v | 126; test3_c <---> test3_d 127; 128; For all the call edges in the call graph, only test3_c and test3_d can be 129; inlined into test3_a, and no other call edge can be inlined. 130; 131; After test3_c is inlined into test3_a, the original call edge test3_a->test3_c 132; will be removed, a new call edge will be added and the call graph becomes: 133; 134; test3_a <---> test3_b 135; \ ^ 136; v / 137; test3_c <---> test3_d 138; But test3_a, test3_b, test3_c and test3_d still belong to the same SCC. 139; 140; Then after test3_a->test3_d is inlined, when test3_a->test3_d is converted to 141; a ref edge, the original SCC will be split into two: {test3_c, test3_d} and 142; {test3_a, test3_b}, immediately after the newly added ref edge 143; test3_a->test3_c will be converted to a call edge, and the two SCCs will be 144; merged into the original one again. During this cycle, the original SCC will 145; be added into UR.CWorklist again and this creates an infinite loop. 146 147@a = global i64 0 148@b = global i64 0 149 150; Check test3_c is inlined into test3_a once and only once. 151; CHECK-LABEL: @test3_a( 152; CHECK: tail call void @test3_b() 153; CHECK-NEXT: tail call void @test3_d(i32 5) 154; CHECK-NEXT: %[[LD1:.*]] = load i64, ptr @a 155; CHECK-NEXT: %[[ADD1:.*]] = add nsw i64 %[[LD1]], 1 156; CHECK-NEXT: store i64 %[[ADD1]], ptr @a 157; CHECK-NEXT: %[[LD2:.*]] = load i64, ptr @b 158; CHECK-NEXT: %[[ADD2:.*]] = add nsw i64 %[[LD2]], 5 159; CHECK-NEXT: store i64 %[[ADD2]], ptr @b 160; CHECK-NEXT: ret void 161 162; Function Attrs: noinline 163define void @test3_a() #0 { 164entry: 165 tail call void @test3_b() 166 tail call void @test3_c(i32 5) 167 %t0 = load i64, ptr @b 168 %add = add nsw i64 %t0, 5 169 store i64 %add, ptr @b 170 ret void 171} 172 173; Function Attrs: noinline 174define void @test3_b() #0 { 175entry: 176 tail call void @test3_a() 177 %t0 = load i64, ptr @a 178 %add = add nsw i64 %t0, 2 179 store i64 %add, ptr @a 180 ret void 181} 182 183define void @test3_d(i32 %i) { 184entry: 185 %cmp = icmp eq i32 %i, 5 186 br i1 %cmp, label %if.end, label %if.then 187 188if.then: ; preds = %entry 189 %call = tail call i64 @random() 190 %t0 = load i64, ptr @a 191 %add = add nsw i64 %t0, %call 192 store i64 %add, ptr @a 193 br label %if.end 194 195if.end: ; preds = %entry, %if.then 196 tail call void @test3_c(i32 %i) 197 tail call void @test3_b() 198 %t6 = load i64, ptr @a 199 %add79 = add nsw i64 %t6, 3 200 store i64 %add79, ptr @a 201 ret void 202} 203 204define void @test3_c(i32 %i) { 205entry: 206 %cmp = icmp eq i32 %i, 5 207 br i1 %cmp, label %if.end, label %if.then 208 209if.then: ; preds = %entry 210 %call = tail call i64 @random() 211 %t0 = load i64, ptr @a 212 %add = add nsw i64 %t0, %call 213 store i64 %add, ptr @a 214 br label %if.end 215 216if.end: ; preds = %entry, %if.then 217 tail call void @test3_d(i32 %i) 218 %t6 = load i64, ptr @a 219 %add85 = add nsw i64 %t6, 1 220 store i64 %add85, ptr @a 221 ret void 222} 223 224declare i64 @random() 225 226attributes #0 = { noinline } 227