1; REQUIRES: aarch64-registered-target 2; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-none-linux-gnu -S | FileCheck %s 3 4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 5target triple = "aarch64--linux-gnu" 6 7; Expect to skip merging two empty blocks (sw.bb and sw.bb2) into sw.epilog 8; as both of them are unlikely executed. 9define i32 @f_switch(i32 %c) { 10; CHECK-LABEL: @f_switch 11; CHECK-LABEL: entry: 12; CHECK: i32 10, label %sw.bb 13; CHECK: i32 20, label %sw.bb2 14entry: 15 switch i32 %c, label %sw.default [ 16 i32 10, label %sw.bb 17 i32 20, label %sw.bb2 18 i32 30, label %sw.bb3 19 i32 40, label %sw.bb4 20 ], !prof !0 21 22sw.bb: ; preds = %entry 23 br label %sw.epilog 24 25sw.bb2: ; preds = %entry 26 br label %sw.epilog 27 28sw.bb3: ; preds = %entry 29 call void @callcase3() 30 br label %sw.epilog 31 32sw.bb4: ; preds = %entry 33 call void @callcase4() 34 br label %sw.epilog 35 36sw.default: ; preds = %entry 37 call void @calldefault() 38 br label %sw.epilog 39 40; CHECK-LABEL: sw.epilog: 41; CHECK: %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %sw.bb ] 42sw.epilog: ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb 43 %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %sw.bb ] 44 call void %fp.0() 45 ret i32 0 46} 47 48; Expect not to merge sw.bb2 because of the conflict in the incoming value from 49; sw.bb which is already merged. 50define i32 @f_switch2(i32 %c) { 51; CHECK-LABEL: @f_switch2 52; CHECK-LABEL: entry: 53; CHECK: i32 10, label %sw.epilog 54; CHECK: i32 20, label %sw.bb2 55entry: 56 switch i32 %c, label %sw.default [ 57 i32 10, label %sw.bb 58 i32 20, label %sw.bb2 59 i32 30, label %sw.bb3 60 i32 40, label %sw.bb4 61 ], !prof !1 62 63sw.bb: ; preds = %entry 64 br label %sw.epilog 65 66sw.bb2: ; preds = %entry 67 br label %sw.epilog 68 69sw.bb3: ; preds = %entry 70 call void @callcase3() 71 br label %sw.epilog 72 73sw.bb4: ; preds = %entry 74 call void @callcase4() 75 br label %sw.epilog 76 77sw.default: ; preds = %entry 78 call void @calldefault() 79 br label %sw.epilog 80 81; CHECK-LABEL: sw.epilog: 82; CHECK: %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %entry ] 83sw.epilog: ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb 84 %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %sw.bb ] 85 call void %fp.0() 86 ret i32 0 87} 88 89; Multiple empty blocks should be considered together if all incoming values 90; from them are same. We expect to merge both empty blocks (sw.bb and sw.bb2) 91; because the sum of frequencies are higer than the threshold. 92define i32 @f_switch3(i32 %c) { 93; CHECK-LABEL: @f_switch3 94; CHECK-LABEL: entry: 95; CHECK: i32 10, label %sw.epilog 96; CHECK: i32 20, label %sw.epilog 97entry: 98 switch i32 %c, label %sw.default [ 99 i32 10, label %sw.bb 100 i32 20, label %sw.bb2 101 i32 30, label %sw.bb3 102 i32 40, label %sw.bb4 103 ], !prof !2 104 105sw.bb: ; preds = %entry 106 br label %sw.epilog 107 108sw.bb2: ; preds = %entry 109 br label %sw.epilog 110 111sw.bb3: ; preds = %entry 112 call void @callcase3() 113 br label %sw.epilog 114 115sw.bb4: ; preds = %entry 116 call void @callcase4() 117 br label %sw.epilog 118 119sw.default: ; preds = %entry 120 call void @calldefault() 121 br label %sw.epilog 122 123; CHECK-LABEL: sw.epilog: 124; CHECK: %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F1, %entry ], [ @F1, %entry ] 125sw.epilog: ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb 126 %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F1, %sw.bb2 ], [ @F1, %sw.bb ] 127 call void %fp.0() 128 ret i32 0 129} 130 131declare void @F1(...) local_unnamed_addr 132declare void @F2(...) local_unnamed_addr 133declare void @F3(...) local_unnamed_addr 134declare void @F4(...) local_unnamed_addr 135declare void @FD(...) local_unnamed_addr 136declare void @callcase3(...) local_unnamed_addr 137declare void @callcase4(...) local_unnamed_addr 138declare void @calldefault(...) local_unnamed_addr 139 140!0 = !{!"branch_weights", i32 5, i32 1, i32 1,i32 5, i32 5} 141!1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1} 142!2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1} 143 144 145; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable() 146; in the case where empty blocks are removed before creating BFI/BPI. 147@b = common global i32 0, align 4 148@a = common global ptr null, align 8 149define i32 @should_not_assert(i32 %i) local_unnamed_addr { 150entry: 151 %0 = load i32, ptr @b, align 4 152 %cond = icmp eq i32 %0, 6 153 br i1 %cond, label %while.cond.preheader, label %sw.epilog 154 155while.cond.preheader: ; preds = %entry 156 %1 = load ptr, ptr @a, align 8 157 %magicptr = ptrtoint ptr %1 to i64 158 %arrayidx = getelementptr inbounds i32, ptr %1, i64 1 159 br label %while.cond 160 161while.cond: ; preds = %while.cond.preheader, %land.rhs 162 switch i64 %magicptr, label %land.rhs [ 163 i64 32, label %while.cond2.loopexit 164 i64 0, label %while.cond2.loopexit 165 ] 166 167land.rhs: ; preds = %while.cond 168 %2 = load i32, ptr %arrayidx, align 4 169 %tobool1 = icmp eq i32 %2, 0 170 br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond 171 172while.cond2thread-pre-split.loopexit: ; preds = %land.rhs 173 br label %while.cond2thread-pre-split 174 175while.cond2thread-pre-split: ; preds = %while.cond2thread-pre-split.loopexit, %while.body4 176 %.pr = phi ptr [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ] 177 br label %while.cond2 178 179while.cond2.loopexit: ; preds = %while.cond, %while.cond 180 br label %while.cond2 181 182while.cond2: ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split 183 %3 = phi ptr [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ] 184 %tobool3 = icmp eq ptr %3, null 185 br i1 %tobool3, label %sw.epilog, label %while.body4 186 187while.body4: ; preds = %while.cond2 188 tail call void @fn2() 189 %.pr.pre = load ptr, ptr @a, align 8 190 br label %while.cond2thread-pre-split 191 192sw.epilog: ; preds = %while.cond2, %entry 193 ret i32 undef 194} 195 196 197declare void @fn2(...) local_unnamed_addr 198 199