1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -S -passes=early-cse -earlycse-debug-hash | FileCheck %s 3; RUN: opt < %s -S -passes='early-cse<memssa>' | FileCheck %s 4; RUN: opt < %s -S -passes=early-cse | FileCheck %s 5 6declare void @llvm.assume(i1) nounwind 7 8define void @test1(i8 %V, ptr%P) { 9; CHECK-LABEL: @test1( 10; CHECK-NEXT: store i32 23, ptr [[P:%.*]], align 4 11; CHECK-NEXT: [[C:%.*]] = zext i8 [[V:%.*]] to i32 12; CHECK-NEXT: store volatile i32 [[C]], ptr [[P]], align 4 13; CHECK-NEXT: store volatile i32 [[C]], ptr [[P]], align 4 14; CHECK-NEXT: [[E:%.*]] = add i32 [[C]], [[C]] 15; CHECK-NEXT: store volatile i32 [[E]], ptr [[P]], align 4 16; CHECK-NEXT: store volatile i32 [[E]], ptr [[P]], align 4 17; CHECK-NEXT: store volatile i32 [[E]], ptr [[P]], align 4 18; CHECK-NEXT: ret void 19; 20 %A = bitcast i64 42 to double ;; dead 21 %B = add i32 4, 19 ;; constant folds 22 store i32 %B, ptr %P 23 24 %C = zext i8 %V to i32 25 %D = zext i8 %V to i32 ;; CSE 26 store volatile i32 %C, ptr %P 27 store volatile i32 %D, ptr %P 28 29 %E = add i32 %C, %C 30 %F = add i32 %C, %C 31 store volatile i32 %E, ptr %P 32 store volatile i32 %F, ptr %P 33 34 %G = add nuw i32 %C, %C 35 store volatile i32 %G, ptr %P 36 ret void 37} 38 39 40;; Simple load value numbering. 41define i32 @test2(ptr%P) { 42; CHECK-LABEL: @test2( 43; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4 44; CHECK-NEXT: ret i32 0 45; 46 %V1 = load i32, ptr %P 47 %V2 = load i32, ptr %P 48 %Diff = sub i32 %V1, %V2 49 ret i32 %Diff 50} 51 52define i32 @test2a(ptr%P, i1 %b) { 53; CHECK-LABEL: @test2a( 54; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4 55; CHECK-NEXT: tail call void @llvm.assume(i1 [[B:%.*]]) 56; CHECK-NEXT: ret i32 0 57; 58 %V1 = load i32, ptr %P 59 tail call void @llvm.assume(i1 %b) 60 %V2 = load i32, ptr %P 61 %Diff = sub i32 %V1, %V2 62 ret i32 %Diff 63} 64 65;; Cross block load value numbering. 66define i32 @test3(ptr%P, i1 %Cond) { 67; CHECK-LABEL: @test3( 68; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4 69; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] 70; CHECK: T: 71; CHECK-NEXT: store i32 4, ptr [[P]], align 4 72; CHECK-NEXT: ret i32 42 73; CHECK: F: 74; CHECK-NEXT: ret i32 0 75; 76 %V1 = load i32, ptr %P 77 br i1 %Cond, label %T, label %F 78T: 79 store i32 4, ptr %P 80 ret i32 42 81F: 82 %V2 = load i32, ptr %P 83 %Diff = sub i32 %V1, %V2 84 ret i32 %Diff 85} 86 87define i32 @test3a(ptr%P, i1 %Cond, i1 %b) { 88; CHECK-LABEL: @test3a( 89; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4 90; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] 91; CHECK: T: 92; CHECK-NEXT: store i32 4, ptr [[P]], align 4 93; CHECK-NEXT: ret i32 42 94; CHECK: F: 95; CHECK-NEXT: tail call void @llvm.assume(i1 [[B:%.*]]) 96; CHECK-NEXT: ret i32 0 97; 98 %V1 = load i32, ptr %P 99 br i1 %Cond, label %T, label %F 100T: 101 store i32 4, ptr %P 102 ret i32 42 103F: 104 tail call void @llvm.assume(i1 %b) 105 %V2 = load i32, ptr %P 106 %Diff = sub i32 %V1, %V2 107 ret i32 %Diff 108} 109 110;; Cross block load value numbering stops when stores happen. 111define i32 @test4(ptr%P, i1 %Cond) { 112; CHECK-LABEL: @test4( 113; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4 114; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] 115; CHECK: T: 116; CHECK-NEXT: ret i32 42 117; CHECK: F: 118; CHECK-NEXT: store i32 42, ptr [[P]], align 4 119; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[V1]], 42 120; CHECK-NEXT: ret i32 [[DIFF]] 121; 122 %V1 = load i32, ptr %P 123 br i1 %Cond, label %T, label %F 124T: 125 ret i32 42 126F: 127 ; Clobbers V1 128 store i32 42, ptr %P 129 130 %V2 = load i32, ptr %P 131 %Diff = sub i32 %V1, %V2 132 ret i32 %Diff 133} 134 135declare i32 @func(ptr%P) readonly 136 137;; Simple call CSE'ing. 138define i32 @test5(ptr%P) { 139; CHECK-LABEL: @test5( 140; CHECK-NEXT: [[V1:%.*]] = call i32 @func(ptr [[P:%.*]]), !prof !0 141; CHECK-NEXT: ret i32 0 142; 143 %V1 = call i32 @func(ptr %P), !prof !0 144 %V2 = call i32 @func(ptr %P), !prof !1 145 %Diff = sub i32 %V1, %V2 146 ret i32 %Diff 147} 148 149!0 = !{!"branch_weights", i32 95} 150!1 = !{!"branch_weights", i32 95} 151 152;; Trivial Store->load forwarding 153define i32 @test6(ptr%P) { 154; CHECK-LABEL: @test6( 155; CHECK-NEXT: store i32 42, ptr [[P:%.*]], align 4 156; CHECK-NEXT: ret i32 42 157; 158 store i32 42, ptr %P 159 %V1 = load i32, ptr %P 160 ret i32 %V1 161} 162 163define i32 @test6a(ptr%P, i1 %b) { 164; CHECK-LABEL: @test6a( 165; CHECK-NEXT: store i32 42, ptr [[P:%.*]], align 4 166; CHECK-NEXT: tail call void @llvm.assume(i1 [[B:%.*]]) 167; CHECK-NEXT: ret i32 42 168; 169 store i32 42, ptr %P 170 tail call void @llvm.assume(i1 %b) 171 %V1 = load i32, ptr %P 172 ret i32 %V1 173} 174 175;; Trivial dead store elimination. 176define void @test7(ptr%P) { 177; CHECK-LABEL: @test7( 178; CHECK-NEXT: store i32 45, ptr [[P:%.*]], align 4 179; CHECK-NEXT: ret void 180; 181 store i32 42, ptr %P 182 store i32 45, ptr %P 183 ret void 184} 185 186;; Readnone functions aren't invalidated by stores. 187define i32 @test8(ptr%P) { 188; CHECK-LABEL: @test8( 189; CHECK-NEXT: [[V1:%.*]] = call i32 @func(ptr [[P:%.*]]) #[[ATTR2:[0-9]+]] 190; CHECK-NEXT: store i32 4, ptr [[P]], align 4 191; CHECK-NEXT: ret i32 0 192; 193 %V1 = call i32 @func(ptr %P) readnone 194 store i32 4, ptr %P 195 %V2 = call i32 @func(ptr %P) readnone 196 %Diff = sub i32 %V1, %V2 197 ret i32 %Diff 198} 199 200;; Trivial DSE can't be performed across a readonly call. The call 201;; can observe the earlier write. 202define i32 @test9(ptr%P) { 203; CHECK-LABEL: @test9( 204; CHECK-NEXT: store i32 4, ptr [[P:%.*]], align 4 205; CHECK-NEXT: [[V1:%.*]] = call i32 @func(ptr [[P]]) #[[ATTR1:[0-9]+]] 206; CHECK-NEXT: store i32 5, ptr [[P]], align 4 207; CHECK-NEXT: ret i32 [[V1]] 208; 209 store i32 4, ptr %P 210 %V1 = call i32 @func(ptr %P) readonly 211 store i32 5, ptr %P 212 ret i32 %V1 213} 214 215;; Trivial DSE can be performed across a readnone call. 216define i32 @test10(ptr%P) { 217; CHECK-LABEL: @test10( 218; CHECK-NEXT: [[V1:%.*]] = call i32 @func(ptr [[P:%.*]]) #[[ATTR2]] 219; CHECK-NEXT: store i32 5, ptr [[P]], align 4 220; CHECK-NEXT: ret i32 [[V1]] 221; 222 store i32 4, ptr %P 223 %V1 = call i32 @func(ptr %P) readnone 224 store i32 5, ptr %P 225 ret i32 %V1 226} 227 228;; Trivial dead store elimination - should work for an entire series of dead stores too. 229define void @test11(ptr%P) { 230; CHECK-LABEL: @test11( 231; CHECK-NEXT: store i32 45, ptr [[P:%.*]], align 4 232; CHECK-NEXT: ret void 233; 234 store i32 42, ptr %P 235 store i32 43, ptr %P 236 store i32 44, ptr %P 237 store i32 45, ptr %P 238 ret void 239} 240 241define i32 @test12(i1 %B, ptr %P1, ptr %P2) { 242; CHECK-LABEL: @test12( 243; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr [[P1:%.*]], align 4 244; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[P2:%.*]] seq_cst, align 4 245; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[P1]], align 4 246; CHECK-NEXT: [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]] 247; CHECK-NEXT: ret i32 [[SEL]] 248; 249 %load0 = load i32, ptr %P1 250 %1 = load atomic i32, ptr %P2 seq_cst, align 4 251 %load1 = load i32, ptr %P1 252 %sel = select i1 %B, i32 %load0, i32 %load1 253 ret i32 %sel 254} 255 256define void @dse1(ptr%P) { 257; CHECK-LABEL: @dse1( 258; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4 259; CHECK-NEXT: ret void 260; 261 %v = load i32, ptr %P 262 store i32 %v, ptr %P 263 ret void 264} 265 266define void @dse2(ptr%P) { 267; CHECK-LABEL: @dse2( 268; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[P:%.*]] seq_cst, align 4 269; CHECK-NEXT: ret void 270; 271 %v = load atomic i32, ptr %P seq_cst, align 4 272 store i32 %v, ptr %P 273 ret void 274} 275 276define void @dse3(ptr%P) { 277; CHECK-LABEL: @dse3( 278; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[P:%.*]] seq_cst, align 4 279; CHECK-NEXT: ret void 280; 281 %v = load atomic i32, ptr %P seq_cst, align 4 282 store atomic i32 %v, ptr %P unordered, align 4 283 ret void 284} 285 286define i32 @dse4(ptr%P, ptr%Q) { 287; CHECK-LABEL: @dse4( 288; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[Q:%.*]], align 4 289; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[P:%.*]] unordered, align 4 290; CHECK-NEXT: ret i32 0 291; 292 %a = load i32, ptr %Q 293 %v = load atomic i32, ptr %P unordered, align 4 294 store atomic i32 %v, ptr %P unordered, align 4 295 %b = load i32, ptr %Q 296 %res = sub i32 %a, %b 297 ret i32 %res 298} 299 300; Note that in this example, %P and %Q could in fact be the same 301; pointer. %v could be different than the value observed for %a 302; and that's okay because we're using relaxed memory ordering. 303; The only guarantee we have to provide is that each of the loads 304; has to observe some value written to that location. We do 305; not have to respect the order in which those writes were done. 306define i32 @dse5(ptr%P, ptr%Q) { 307; CHECK-LABEL: @dse5( 308; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[P:%.*]] unordered, align 4 309; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[Q:%.*]] unordered, align 4 310; CHECK-NEXT: ret i32 0 311; 312 %v = load atomic i32, ptr %P unordered, align 4 313 %a = load atomic i32, ptr %Q unordered, align 4 314 store atomic i32 %v, ptr %P unordered, align 4 315 %b = load atomic i32, ptr %Q unordered, align 4 316 %res = sub i32 %a, %b 317 ret i32 %res 318} 319 320 321define void @dse_neg1(ptr%P) { 322; CHECK-LABEL: @dse_neg1( 323; CHECK-NEXT: store i32 5, ptr [[P:%.*]], align 4 324; CHECK-NEXT: ret void 325; 326 %v = load i32, ptr %P 327 store i32 5, ptr %P 328 ret void 329} 330 331; Could remove the store, but only if ordering was somehow 332; encoded. 333define void @dse_neg2(ptr%P) { 334; CHECK-LABEL: @dse_neg2( 335; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4 336; CHECK-NEXT: store atomic i32 [[V]], ptr [[P]] seq_cst, align 4 337; CHECK-NEXT: ret void 338; 339 %v = load i32, ptr %P 340 store atomic i32 %v, ptr %P seq_cst, align 4 341 ret void 342} 343 344@c = external global i32, align 4 345declare i32 @reads_c(i32 returned) 346define void @pr28763() { 347; CHECK-LABEL: @pr28763( 348; CHECK-NEXT: entry: 349; CHECK-NEXT: store i32 0, ptr @c, align 4 350; CHECK-NEXT: [[CALL:%.*]] = call i32 @reads_c(i32 0) 351; CHECK-NEXT: store i32 2, ptr @c, align 4 352; CHECK-NEXT: ret void 353; 354entry: 355 %load = load i32, ptr @c, align 4 356 store i32 0, ptr @c, align 4 357 %call = call i32 @reads_c(i32 0) 358 store i32 2, ptr @c, align 4 359 ret void 360} 361 362define i1 @cse_freeze(i1 %a) { 363; CHECK-LABEL: @cse_freeze( 364; CHECK-NEXT: entry: 365; CHECK-NEXT: [[B:%.*]] = freeze i1 [[A:%.*]] 366; CHECK-NEXT: ret i1 [[B]] 367; 368entry: 369 %b = freeze i1 %a 370 %c = freeze i1 %a 371 %and = and i1 %b, %c 372 ret i1 %and 373} 374