1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,loop-mssa(loop-predication)' -verify-memoryssa < %s 2>&1 | FileCheck %s 3 4; latch block exits to a speculation block. We account for this since deopt is 5; very rarely taken. So we do not predicate this loop using that coarse latch 6; check. 7; LatchExitProbability: 0x04000000 / 0x80000000 = 3.12% 8; ExitingBlockProbability: 0x7ffa572a / 0x80000000 = 99.98% 9define i64 @donot_predicate(ptr nocapture readonly %arg, i32 %length, ptr nocapture readonly %arg2, ptr nocapture readonly %n_addr, i64 %i) !prof !21 { 10; CHECK-LABEL: @donot_predicate( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64 13; CHECK-NEXT: [[N_PRE:%.*]] = load i64, ptr [[N_ADDR:%.*]], align 4 14; CHECK-NEXT: br label [[HEADER:%.*]] 15; CHECK: Header: 16; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ] 17; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] 18; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] 19; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] 20; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] 21; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 22; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF1:![0-9]+]] 23; CHECK: Latch: 24; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 25; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[DEOPT:%.*]] 26; CHECK: deopt: 27; CHECK-NEXT: [[COUNTED_SPECULATION_FAILED:%.*]] = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ] 28; CHECK-NEXT: ret i64 [[COUNTED_SPECULATION_FAILED]] 29; CHECK: exit: 30; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ] 31; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8 32; CHECK-NEXT: ret i64 [[RESULT_LE]] 33; 34entry: 35 %length.ext = zext i32 %length to i64 36 %n.pre = load i64, ptr %n_addr, align 4 37 br label %Header 38 39Header: ; preds = %entry, %Latch 40 %result.in3 = phi ptr [ %arg2, %entry ], [ %arg, %Latch ] 41 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 42 %within.bounds = icmp ult i64 %j2, %length.ext 43 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 44 %innercmp = icmp eq i64 %j2, %n.pre 45 %j.next = add nuw nsw i64 %j2, 1 46 br i1 %innercmp, label %Latch, label %exit, !prof !0 47 48Latch: ; preds = %Header 49 %speculate_trip_count = icmp ult i64 %j.next, 1048576 50 br i1 %speculate_trip_count, label %Header, label %deopt 51 52deopt: ; preds = %Latch 53 %counted_speculation_failed = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ] 54 ret i64 %counted_speculation_failed 55 56exit: ; preds = %Header 57 %result.in3.lcssa = phi ptr [ %result.in3, %Header ] 58 %result.le = load i64, ptr %result.in3.lcssa, align 8 59 ret i64 %result.le 60} 61!0 = !{!"branch_weights", i32 18, i32 104200} 62 63; predicate loop since there's no profile information and BPI concluded all 64; exiting blocks have same probability of exiting from loop. 65define i64 @predicate(ptr nocapture readonly %arg, i32 %length, ptr nocapture readonly %arg2, ptr nocapture readonly %n_addr, i64 %i) !prof !21 { 66; CHECK-LABEL: @predicate( 67; CHECK-NEXT: entry: 68; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64 69; CHECK-NEXT: [[N_PRE:%.*]] = load i64, ptr [[N_ADDR:%.*]], align 4 70; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]] 71; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]] 72; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] 73; CHECK-NEXT: [[TMP3:%.*]] = freeze i1 [[TMP2]] 74; CHECK-NEXT: br label [[HEADER:%.*]] 75; CHECK: Header: 76; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ] 77; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] 78; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] 79; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ] 80; CHECK-NEXT: call void @llvm.assume(i1 [[WITHIN_BOUNDS]]) 81; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] 82; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 83; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]] 84; CHECK: Latch: 85; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 86; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]] 87; CHECK: exitLatch: 88; CHECK-NEXT: ret i64 1 89; CHECK: exit: 90; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ] 91; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8 92; CHECK-NEXT: ret i64 [[RESULT_LE]] 93; 94entry: 95 %length.ext = zext i32 %length to i64 96 %n.pre = load i64, ptr %n_addr, align 4 97 br label %Header 98 99Header: ; preds = %entry, %Latch 100 %result.in3 = phi ptr [ %arg2, %entry ], [ %arg, %Latch ] 101 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 102 %within.bounds = icmp ult i64 %j2, %length.ext 103 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 104 %innercmp = icmp eq i64 %j2, %n.pre 105 %j.next = add nuw nsw i64 %j2, 1 106 br i1 %innercmp, label %Latch, label %exit 107 108Latch: ; preds = %Header 109 %speculate_trip_count = icmp ult i64 %j.next, 1048576 110 br i1 %speculate_trip_count, label %Header, label %exitLatch 111 112exitLatch: ; preds = %Latch 113 ret i64 1 114 115exit: ; preds = %Header 116 %result.in3.lcssa = phi ptr [ %result.in3, %Header ] 117 %result.le = load i64, ptr %result.in3.lcssa, align 8 118 ret i64 %result.le 119} 120 121; Same as test above but with profiling data that the most probable exit from 122; the loop is the header exiting block (not the latch block). So do not predicate. 123; LatchExitProbability: 0x000020e1 / 0x80000000 = 0.00% 124; ExitingBlockProbability: 0x7ffcbb86 / 0x80000000 = 99.99% 125define i64 @donot_predicate_prof(ptr nocapture readonly %arg, i32 %length, ptr nocapture readonly %arg2, ptr nocapture readonly %n_addr, i64 %i) !prof !21 { 126; CHECK-LABEL: @donot_predicate_prof( 127; CHECK-NEXT: entry: 128; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64 129; CHECK-NEXT: [[N_PRE:%.*]] = load i64, ptr [[N_ADDR:%.*]], align 4 130; CHECK-NEXT: br label [[HEADER:%.*]] 131; CHECK: Header: 132; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ] 133; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] 134; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] 135; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] 136; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] 137; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 138; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF2:![0-9]+]] 139; CHECK: Latch: 140; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 141; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF3:![0-9]+]] 142; CHECK: exitLatch: 143; CHECK-NEXT: ret i64 1 144; CHECK: exit: 145; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ] 146; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8 147; CHECK-NEXT: ret i64 [[RESULT_LE]] 148; 149entry: 150 %length.ext = zext i32 %length to i64 151 %n.pre = load i64, ptr %n_addr, align 4 152 br label %Header 153 154Header: ; preds = %entry, %Latch 155 %result.in3 = phi ptr [ %arg2, %entry ], [ %arg, %Latch ] 156 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 157 %within.bounds = icmp ult i64 %j2, %length.ext 158 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 159 %innercmp = icmp eq i64 %j2, %n.pre 160 %j.next = add nuw nsw i64 %j2, 1 161 br i1 %innercmp, label %Latch, label %exit, !prof !1 162 163Latch: ; preds = %Header 164 %speculate_trip_count = icmp ult i64 %j.next, 1048576 165 br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2 166 167exitLatch: ; preds = %Latch 168 ret i64 1 169 170exit: ; preds = %Header 171 %result.in3.lcssa = phi ptr [ %result.in3, %Header ] 172 %result.le = load i64, ptr %result.in3.lcssa, align 8 173 ret i64 %result.le 174} 175declare i64 @llvm.experimental.deoptimize.i64(...) 176declare void @llvm.experimental.guard(i1, ...) 177 178!1 = !{!"branch_weights", i32 104, i32 1042861} 179!2 = !{!"branch_weights", i32 255129, i32 1} 180!21 = !{!"function_entry_count", i64 20000} 181