1; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s 2 3target datalayout = 4"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" 5target triple = "x86_64-unknown-linux-gnu" 6 7@x = external global [1 x [2 x <4 x float>]] 8 9; Can we sink single addressing mode computation to use? 10define void @test1(i1 %cond, ptr %base) { 11; CHECK-LABEL: @test1 12; CHECK: getelementptr inbounds i8, {{.+}} 40 13entry: 14 %addr = getelementptr inbounds i64, ptr %base, i64 5 15 br i1 %cond, label %if.then, label %fallthrough 16 17if.then: 18 %v = load i32, ptr %addr, align 4 19 br label %fallthrough 20 21fallthrough: 22 ret void 23} 24 25declare void @foo(i32) 26 27; Make sure sinking two copies of addressing mode into different blocks works 28define void @test2(i1 %cond, ptr %base) { 29; CHECK-LABEL: @test2 30entry: 31 %addr = getelementptr inbounds i64, ptr %base, i64 5 32 br i1 %cond, label %if.then, label %fallthrough 33 34if.then: 35; CHECK-LABEL: if.then: 36; CHECK: getelementptr inbounds i8, {{.+}} 40 37 %v1 = load i32, ptr %addr, align 4 38 call void @foo(i32 %v1) 39 %cmp = icmp eq i32 %v1, 0 40 br i1 %cmp, label %next, label %fallthrough 41 42next: 43; CHECK-LABEL: next: 44; CHECK: getelementptr inbounds i8, {{.+}} 40 45 %v2 = load i32, ptr %addr, align 4 46 call void @foo(i32 %v2) 47 br label %fallthrough 48 49fallthrough: 50 ret void 51} 52 53; If we have two loads in the same block, only need one copy of addressing mode 54; - instruction selection will duplicate if needed 55define void @test3(i1 %cond, ptr %base) { 56; CHECK-LABEL: @test3 57entry: 58 %addr = getelementptr inbounds i64, ptr %base, i64 5 59 br i1 %cond, label %if.then, label %fallthrough 60 61if.then: 62; CHECK-LABEL: if.then: 63; CHECK: getelementptr inbounds i8, {{.+}} 40 64 %v1 = load i32, ptr %addr, align 4 65 call void @foo(i32 %v1) 66; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 67 %v2 = load i32, ptr %addr, align 4 68 call void @foo(i32 %v2) 69 br label %fallthrough 70 71fallthrough: 72 ret void 73} 74 75; Can we still sink addressing mode if there's a cold use of the 76; address itself? 77define void @test4(i1 %cond, ptr %base) { 78; CHECK-LABEL: @test4 79entry: 80 %addr = getelementptr inbounds i64, ptr %base, i64 5 81 br i1 %cond, label %if.then, label %fallthrough 82 83if.then: 84; CHECK-LABEL: if.then: 85; CHECK: getelementptr inbounds i8, {{.+}} 40 86 %v1 = load i32, ptr %addr, align 4 87 call void @foo(i32 %v1) 88 %cmp = icmp eq i32 %v1, 0 89 br i1 %cmp, label %rare.1, label %fallthrough 90 91fallthrough: 92 ret void 93 94rare.1: 95; CHECK-LABEL: rare.1: 96; CHECK: getelementptr inbounds i8, {{.+}} 40 97 call void @slowpath(i32 %v1, ptr %addr) cold 98 br label %fallthrough 99} 100 101; Negative test - don't want to duplicate addressing into hot path 102define void @test5(i1 %cond, ptr %base) { 103; CHECK-LABEL: @test5 104entry: 105; CHECK: %addr = getelementptr inbounds 106 %addr = getelementptr inbounds i64, ptr %base, i64 5 107 br i1 %cond, label %if.then, label %fallthrough 108 109if.then: 110; CHECK-LABEL: if.then: 111; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 112 %v1 = load i32, ptr %addr, align 4 113 call void @foo(i32 %v1) 114 %cmp = icmp eq i32 %v1, 0 115 br i1 %cmp, label %rare.1, label %fallthrough 116 117fallthrough: 118 ret void 119 120rare.1: 121 call void @slowpath(i32 %v1, ptr %addr) ;; NOT COLD 122 br label %fallthrough 123} 124 125; Negative test - opt for size 126define void @test6(i1 %cond, ptr %base) minsize { 127; CHECK-LABEL: @test6 128entry: 129; CHECK: %addr = getelementptr 130 %addr = getelementptr inbounds i64, ptr %base, i64 5 131 br i1 %cond, label %if.then, label %fallthrough 132 133if.then: 134; CHECK-LABEL: if.then: 135; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 136 %v1 = load i32, ptr %addr, align 4 137 call void @foo(i32 %v1) 138 %cmp = icmp eq i32 %v1, 0 139 br i1 %cmp, label %rare.1, label %fallthrough 140 141fallthrough: 142 ret void 143 144rare.1: 145 call void @slowpath(i32 %v1, ptr %addr) cold 146 br label %fallthrough 147} 148 149; Negative test - opt for size 150define void @test6_pgso(i1 %cond, ptr %base) !prof !14 { 151; CHECK-LABEL: @test6 152entry: 153; CHECK: %addr = getelementptr 154 %addr = getelementptr inbounds i64, ptr %base, i64 5 155 br i1 %cond, label %if.then, label %fallthrough 156 157if.then: 158; CHECK-LABEL: if.then: 159; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 160 %v1 = load i32, ptr %addr, align 4 161 call void @foo(i32 %v1) 162 %cmp = icmp eq i32 %v1, 0 163 br i1 %cmp, label %rare.1, label %fallthrough 164 165fallthrough: 166 ret void 167 168rare.1: 169 call void @slowpath(i32 %v1, ptr %addr) cold 170 br label %fallthrough 171} 172 173; Make sure sinking two copies of addressing mode into different blocks works 174; when there are cold paths for each. 175define void @test7(i1 %cond, ptr %base) { 176; CHECK-LABEL: @test7 177entry: 178 %addr = getelementptr inbounds i64, ptr %base, i64 5 179 br i1 %cond, label %if.then, label %fallthrough 180 181if.then: 182; CHECK-LABEL: if.then: 183; CHECK: getelementptr inbounds i8, {{.+}} 40 184 %v1 = load i32, ptr %addr, align 4 185 call void @foo(i32 %v1) 186 %cmp = icmp eq i32 %v1, 0 187 br i1 %cmp, label %rare.1, label %next 188 189next: 190; CHECK-LABEL: next: 191; CHECK: getelementptr inbounds i8, {{.+}} 40 192 %v2 = load i32, ptr %addr, align 4 193 call void @foo(i32 %v2) 194 %cmp2 = icmp eq i32 %v2, 0 195 br i1 %cmp2, label %rare.1, label %fallthrough 196 197fallthrough: 198 ret void 199 200rare.1: 201; CHECK-LABEL: rare.1: 202; CHECK: getelementptr inbounds i8, {{.+}} 40 203 call void @slowpath(i32 %v1, ptr %addr) cold 204 br label %next 205 206rare.2: 207; CHECK-LABEL: rare.2: 208; CHECK: getelementptr inbounds i8, {{.+}} 40 209 call void @slowpath(i32 %v2, ptr %addr) cold 210 br label %fallthrough 211} 212 213declare void @slowpath(i32, ptr) 214 215; Make sure we don't end up in an infinite loop after we fail to sink. 216; CHECK-LABEL: define void @test8 217; CHECK: %ptr = getelementptr i8, ptr %aFOO_load_ptr2int_2void, i32 undef 218define void @test8() { 219allocas: 220 %aFOO_load = load ptr, ptr undef 221 %aFOO_load_ptr2int = ptrtoint ptr %aFOO_load to i64 222 %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0 223 %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to ptr 224 %ptr = getelementptr i8, ptr %aFOO_load_ptr2int_2void, i32 undef 225 br label %load.i145 226 227load.i145: 228 %valall.i144 = load <4 x float>, ptr %ptr, align 4 229 br label %pl_loop.i.i122 230 231pl_loop.i.i122: 232 br label %pl_loop.i.i122 233} 234 235; Make sure we can sink address computation even 236; if there is a cycle in phi nodes. 237define void @test9(i1 %cond, ptr %base) { 238; CHECK-LABEL: @test9 239entry: 240 %addr = getelementptr inbounds i64, ptr %base, i64 5 241 br label %header 242 243header: 244 %iv = phi i32 [0, %entry], [%iv.inc, %backedge] 245 %casted.loop = phi ptr [%addr, %entry], [%casted.merged, %backedge] 246 br i1 %cond, label %if.then, label %backedge 247 248if.then: 249 call void @foo(i32 %iv) 250 %addr.1 = getelementptr inbounds i64, ptr %base, i64 5 251 br label %backedge 252 253backedge: 254; CHECK-LABEL: backedge: 255; CHECK: getelementptr inbounds i8, {{.+}} 40 256 %casted.merged = phi ptr [%casted.loop, %header], [%addr.1, %if.then] 257 %v = load i32, ptr %casted.merged, align 4 258 call void @foo(i32 %v) 259 %iv.inc = add i32 %iv, 1 260 %cmp = icmp slt i32 %iv.inc, 1000 261 br i1 %cmp, label %header, label %exit 262 263exit: 264 ret void 265} 266 267; Make sure we can eliminate a select when both arguments perform equivalent 268; address computation. 269define void @test10(i1 %cond, ptr %base) { 270; CHECK-LABEL: @test10 271; CHECK: getelementptr inbounds i8, {{.+}} 40 272; CHECK-NOT: select 273entry: 274 %gep1 = getelementptr inbounds i64, ptr %base, i64 5 275 %gep2 = getelementptr inbounds i32, ptr %base, i64 10 276 %casted.merged = select i1 %cond, ptr %gep1, ptr %gep2 277 %v = load i32, ptr %casted.merged, align 4 278 call void @foo(i32 %v) 279 ret void 280} 281 282; Found by fuzzer, getSExtValue of > 64 bit constant 283define void @i96_mul(ptr %base, i96 %offset) { 284BB: 285 ;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF 286 %B84 = mul i96 %offset, 39614081257132168796771975167 287 %G23 = getelementptr i1, ptr %base, i96 %B84 288 store i1 false, ptr %G23 289 ret void 290} 291 292!llvm.module.flags = !{!0} 293!0 = !{i32 1, !"ProfileSummary", !1} 294!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 295!2 = !{!"ProfileFormat", !"InstrProf"} 296!3 = !{!"TotalCount", i64 10000} 297!4 = !{!"MaxCount", i64 10} 298!5 = !{!"MaxInternalCount", i64 1} 299!6 = !{!"MaxFunctionCount", i64 1000} 300!7 = !{!"NumCounts", i64 3} 301!8 = !{!"NumFunctions", i64 3} 302!9 = !{!"DetailedSummary", !10} 303!10 = !{!11, !12, !13} 304!11 = !{i32 10000, i64 100, i32 1} 305!12 = !{i32 999000, i64 100, i32 1} 306!13 = !{i32 999999, i64 1, i32 2} 307!14 = !{!"function_entry_count", i64 0} 308