1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S < %s -passes=instcombine | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 5target triple = "x86_64-apple-macosx10.7.0" 6 7; Check transforms involving atomic operations 8 9define i32 @test1(ptr %p) { 10; CHECK-LABEL: @test1( 11; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr [[P:%.*]] seq_cst, align 4 12; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 13; CHECK-NEXT: ret i32 [[Z]] 14; 15 %x = load atomic i32, ptr %p seq_cst, align 4 16 %y = load i32, ptr %p, align 4 17 %z = add i32 %x, %y 18 ret i32 %z 19} 20 21define i32 @test2(ptr %p) { 22; CHECK-LABEL: @test2( 23; CHECK-NEXT: [[X:%.*]] = load volatile i32, ptr [[P:%.*]], align 4 24; CHECK-NEXT: [[Y:%.*]] = load volatile i32, ptr [[P]], align 4 25; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] 26; CHECK-NEXT: ret i32 [[Z]] 27; 28 %x = load volatile i32, ptr %p, align 4 29 %y = load volatile i32, ptr %p, align 4 30 %z = add i32 %x, %y 31 ret i32 %z 32} 33 34; The exact semantics of mixing volatile and non-volatile on the same 35; memory location are a bit unclear, but conservatively, we know we don't 36; want to remove the volatile. 37define i32 @test3(ptr %p) { 38; CHECK-LABEL: @test3( 39; CHECK-NEXT: [[X:%.*]] = load volatile i32, ptr [[P:%.*]], align 4 40; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 41; CHECK-NEXT: ret i32 [[Z]] 42; 43 %x = load volatile i32, ptr %p, align 4 44 %y = load i32, ptr %p, align 4 45 %z = add i32 %x, %y 46 ret i32 %z 47} 48 49; Forwarding from a stronger ordered atomic is fine 50define i32 @test4(ptr %p) { 51; CHECK-LABEL: @test4( 52; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr [[P:%.*]] seq_cst, align 4 53; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 54; CHECK-NEXT: ret i32 [[Z]] 55; 56 %x = load atomic i32, ptr %p seq_cst, align 4 57 %y = load atomic i32, ptr %p unordered, align 4 58 %z = add i32 %x, %y 59 ret i32 %z 60} 61 62; Forwarding from a non-atomic is not. (The earlier load 63; could in priciple be promoted to atomic and then forwarded, 64; but we can't just drop the atomic from the load.) 65define i32 @test5(ptr %p) { 66; CHECK-LABEL: @test5( 67; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr [[P:%.*]] unordered, align 4 68; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 69; CHECK-NEXT: ret i32 [[Z]] 70; 71 %x = load atomic i32, ptr %p unordered, align 4 72 %y = load i32, ptr %p, align 4 73 %z = add i32 %x, %y 74 ret i32 %z 75} 76 77; Forwarding atomic to atomic is fine 78define i32 @test6(ptr %p) { 79; CHECK-LABEL: @test6( 80; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr [[P:%.*]] unordered, align 4 81; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 82; CHECK-NEXT: ret i32 [[Z]] 83; 84 %x = load atomic i32, ptr %p unordered, align 4 85 %y = load atomic i32, ptr %p unordered, align 4 86 %z = add i32 %x, %y 87 ret i32 %z 88} 89 90; FIXME: we currently don't do anything for monotonic 91define i32 @test7(ptr %p) { 92; CHECK-LABEL: @test7( 93; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr [[P:%.*]] seq_cst, align 4 94; CHECK-NEXT: [[Y:%.*]] = load atomic i32, ptr [[P]] monotonic, align 4 95; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] 96; CHECK-NEXT: ret i32 [[Z]] 97; 98 %x = load atomic i32, ptr %p seq_cst, align 4 99 %y = load atomic i32, ptr %p monotonic, align 4 100 %z = add i32 %x, %y 101 ret i32 %z 102} 103 104; FIXME: We could forward in racy code 105define i32 @test8(ptr %p) { 106; CHECK-LABEL: @test8( 107; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr [[P:%.*]] seq_cst, align 4 108; CHECK-NEXT: [[Y:%.*]] = load atomic i32, ptr [[P]] acquire, align 4 109; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] 110; CHECK-NEXT: ret i32 [[Z]] 111; 112 %x = load atomic i32, ptr %p seq_cst, align 4 113 %y = load atomic i32, ptr %p acquire, align 4 114 %z = add i32 %x, %y 115 ret i32 %z 116} 117 118; An unordered access to null is still unreachable. There's no 119; ordering imposed. 120define i32 @test9() { 121; CHECK-LABEL: @test9( 122; CHECK-NEXT: store i1 true, ptr poison, align 1 123; CHECK-NEXT: ret i32 poison 124; 125 %x = load atomic i32, ptr null unordered, align 4 126 ret i32 %x 127} 128 129define i32 @test9_no_null_opt() #0 { 130; CHECK-LABEL: @test9_no_null_opt( 131; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr null unordered, align 4 132; CHECK-NEXT: ret i32 [[X]] 133; 134 %x = load atomic i32, ptr null unordered, align 4 135 ret i32 %x 136} 137 138; FIXME: Could also fold 139define i32 @test10() { 140; CHECK-LABEL: @test10( 141; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr null monotonic, align 4 142; CHECK-NEXT: ret i32 [[X]] 143; 144 %x = load atomic i32, ptr null monotonic, align 4 145 ret i32 %x 146} 147 148define i32 @test10_no_null_opt() #0 { 149; CHECK-LABEL: @test10_no_null_opt( 150; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr null monotonic, align 4 151; CHECK-NEXT: ret i32 [[X]] 152; 153 %x = load atomic i32, ptr null monotonic, align 4 154 ret i32 %x 155} 156 157; Would this be legal to fold? Probably? 158define i32 @test11() { 159; CHECK-LABEL: @test11( 160; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr null seq_cst, align 4 161; CHECK-NEXT: ret i32 [[X]] 162; 163 %x = load atomic i32, ptr null seq_cst, align 4 164 ret i32 %x 165} 166 167define i32 @test11_no_null_opt() #0 { 168; CHECK-LABEL: @test11_no_null_opt( 169; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr null seq_cst, align 4 170; CHECK-NEXT: ret i32 [[X]] 171; 172 %x = load atomic i32, ptr null seq_cst, align 4 173 ret i32 %x 174} 175 176; An unordered access to null is still unreachable. There's no 177; ordering imposed. 178define i32 @test12() { 179; CHECK-LABEL: @test12( 180; CHECK-NEXT: store atomic i32 poison, ptr null unordered, align 4 181; CHECK-NEXT: ret i32 0 182; 183 store atomic i32 0, ptr null unordered, align 4 184 ret i32 0 185} 186 187define i32 @test12_no_null_opt() #0 { 188; CHECK-LABEL: @test12_no_null_opt( 189; CHECK-NEXT: store atomic i32 0, ptr null unordered, align 4 190; CHECK-NEXT: ret i32 0 191; 192 store atomic i32 0, ptr null unordered, align 4 193 ret i32 0 194} 195 196; FIXME: Could also fold 197define i32 @test13() { 198; CHECK-LABEL: @test13( 199; CHECK-NEXT: store atomic i32 0, ptr null monotonic, align 4 200; CHECK-NEXT: ret i32 0 201; 202 store atomic i32 0, ptr null monotonic, align 4 203 ret i32 0 204} 205 206define i32 @test13_no_null_opt() #0 { 207; CHECK-LABEL: @test13_no_null_opt( 208; CHECK-NEXT: store atomic i32 0, ptr null monotonic, align 4 209; CHECK-NEXT: ret i32 0 210; 211 store atomic i32 0, ptr null monotonic, align 4 212 ret i32 0 213} 214 215; Would this be legal to fold? Probably? 216define i32 @test14() { 217; CHECK-LABEL: @test14( 218; CHECK-NEXT: store atomic i32 0, ptr null seq_cst, align 4 219; CHECK-NEXT: ret i32 0 220; 221 store atomic i32 0, ptr null seq_cst, align 4 222 ret i32 0 223} 224 225define i32 @test14_no_null_opt() #0 { 226; CHECK-LABEL: @test14_no_null_opt( 227; CHECK-NEXT: store atomic i32 0, ptr null seq_cst, align 4 228; CHECK-NEXT: ret i32 0 229; 230 store atomic i32 0, ptr null seq_cst, align 4 231 ret i32 0 232} 233 234@a = external global i32 235@b = external global i32 236 237define i32 @test15(i1 %cnd) { 238; CHECK-LABEL: @test15( 239; CHECK-NEXT: [[A_VAL:%.*]] = load atomic i32, ptr @a unordered, align 4 240; CHECK-NEXT: [[B_VAL:%.*]] = load atomic i32, ptr @b unordered, align 4 241; CHECK-NEXT: [[X:%.*]] = select i1 [[CND:%.*]], i32 [[A_VAL]], i32 [[B_VAL]] 242; CHECK-NEXT: ret i32 [[X]] 243; 244 %addr = select i1 %cnd, ptr @a, ptr @b 245 %x = load atomic i32, ptr %addr unordered, align 4 246 ret i32 %x 247} 248 249; FIXME: This would be legal to transform 250define i32 @test16(i1 %cnd) { 251; CHECK-LABEL: @test16( 252; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], ptr @a, ptr @b 253; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr [[ADDR]] monotonic, align 4 254; CHECK-NEXT: ret i32 [[X]] 255; 256 %addr = select i1 %cnd, ptr @a, ptr @b 257 %x = load atomic i32, ptr %addr monotonic, align 4 258 ret i32 %x 259} 260 261; FIXME: This would be legal to transform 262define i32 @test17(i1 %cnd) { 263; CHECK-LABEL: @test17( 264; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], ptr @a, ptr @b 265; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr [[ADDR]] seq_cst, align 4 266; CHECK-NEXT: ret i32 [[X]] 267; 268 %addr = select i1 %cnd, ptr @a, ptr @b 269 %x = load atomic i32, ptr %addr seq_cst, align 4 270 ret i32 %x 271} 272 273define i32 @test22(i1 %cnd) { 274; CHECK-LABEL: @test22( 275; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] 276; CHECK: block1: 277; CHECK-NEXT: br label [[MERGE:%.*]] 278; CHECK: block2: 279; CHECK-NEXT: br label [[MERGE]] 280; CHECK: merge: 281; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 2, [[BLOCK2]] ], [ 1, [[BLOCK1]] ] 282; CHECK-NEXT: store atomic i32 [[STOREMERGE]], ptr @a unordered, align 4 283; CHECK-NEXT: ret i32 0 284; 285 br i1 %cnd, label %block1, label %block2 286 287block1: 288 store atomic i32 1, ptr @a unordered, align 4 289 br label %merge 290block2: 291 store atomic i32 2, ptr @a unordered, align 4 292 br label %merge 293 294merge: 295 ret i32 0 296} 297 298; TODO: probably also legal here 299define i32 @test23(i1 %cnd) { 300; CHECK-LABEL: @test23( 301; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] 302; CHECK: block1: 303; CHECK-NEXT: store atomic i32 1, ptr @a monotonic, align 4 304; CHECK-NEXT: br label [[MERGE:%.*]] 305; CHECK: block2: 306; CHECK-NEXT: store atomic i32 2, ptr @a monotonic, align 4 307; CHECK-NEXT: br label [[MERGE]] 308; CHECK: merge: 309; CHECK-NEXT: ret i32 0 310; 311 br i1 %cnd, label %block1, label %block2 312 313block1: 314 store atomic i32 1, ptr @a monotonic, align 4 315 br label %merge 316block2: 317 store atomic i32 2, ptr @a monotonic, align 4 318 br label %merge 319 320merge: 321 ret i32 0 322} 323 324declare void @clobber() 325 326define i32 @test18(ptr %p) { 327; CHECK-LABEL: @test18( 328; CHECK-NEXT: [[X:%.*]] = load atomic float, ptr [[P:%.*]] unordered, align 4 329; CHECK-NEXT: call void @clobber() 330; CHECK-NEXT: store atomic float [[X]], ptr [[P]] unordered, align 4 331; CHECK-NEXT: ret i32 0 332; 333 %x = load atomic float, ptr %p unordered, align 4 334 call void @clobber() ;; keep the load around 335 store atomic float %x, ptr %p unordered, align 4 336 ret i32 0 337} 338 339; TODO: probably also legal in this case 340define i32 @test19(ptr %p) { 341; CHECK-LABEL: @test19( 342; CHECK-NEXT: [[X:%.*]] = load atomic float, ptr [[P:%.*]] seq_cst, align 4 343; CHECK-NEXT: call void @clobber() 344; CHECK-NEXT: store atomic float [[X]], ptr [[P]] seq_cst, align 4 345; CHECK-NEXT: ret i32 0 346; 347 %x = load atomic float, ptr %p seq_cst, align 4 348 call void @clobber() ;; keep the load around 349 store atomic float %x, ptr %p seq_cst, align 4 350 ret i32 0 351} 352 353define i32 @test20(ptr %p, ptr %v) { 354; CHECK-LABEL: @test20( 355; CHECK-NEXT: store atomic ptr [[V:%.*]], ptr [[P:%.*]] unordered, align 4 356; CHECK-NEXT: ret i32 0 357; 358 store atomic ptr %v, ptr %p unordered, align 4 359 ret i32 0 360} 361 362define i32 @test21(ptr %p, ptr %v) { 363; CHECK-LABEL: @test21( 364; CHECK-NEXT: store atomic ptr [[V:%.*]], ptr [[P:%.*]] monotonic, align 4 365; CHECK-NEXT: ret i32 0 366; 367 store atomic ptr %v, ptr %p monotonic, align 4 368 ret i32 0 369} 370 371define void @pr27490a(ptr %p1, ptr %p2) { 372; CHECK-LABEL: @pr27490a( 373; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[P1:%.*]], align 8 374; CHECK-NEXT: store volatile ptr [[L]], ptr [[P2:%.*]], align 8 375; CHECK-NEXT: ret void 376; 377 %l = load ptr, ptr %p1 378 store volatile ptr %l, ptr %p2 379 ret void 380} 381 382define void @pr27490b(ptr %p1, ptr %p2) { 383; CHECK-LABEL: @pr27490b( 384; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[P1:%.*]], align 8 385; CHECK-NEXT: store atomic ptr [[L]], ptr [[P2:%.*]] seq_cst, align 8 386; CHECK-NEXT: ret void 387; 388 %l = load ptr, ptr %p1 389 store atomic ptr %l, ptr %p2 seq_cst, align 8 390 ret void 391} 392 393;; At the moment, we can't form atomic vectors by folding since these are 394;; not representable in the IR. This was pr29121. The right long term 395;; solution is to extend the IR to handle this case. 396define <2 x float> @no_atomic_vector_load(ptr %p) { 397; CHECK-LABEL: @no_atomic_vector_load( 398; CHECK-NEXT: [[LOAD:%.*]] = load atomic i64, ptr [[P:%.*]] unordered, align 8 399; CHECK-NEXT: [[DOTCAST:%.*]] = bitcast i64 [[LOAD]] to <2 x float> 400; CHECK-NEXT: ret <2 x float> [[DOTCAST]] 401; 402 %load = load atomic i64, ptr %p unordered, align 8 403 %.cast = bitcast i64 %load to <2 x float> 404 ret <2 x float> %.cast 405} 406 407define void @no_atomic_vector_store(<2 x float> %p, ptr %p2) { 408; CHECK-LABEL: @no_atomic_vector_store( 409; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[P:%.*]] to i64 410; CHECK-NEXT: store atomic i64 [[TMP1]], ptr [[P2:%.*]] unordered, align 8 411; CHECK-NEXT: ret void 412; 413 %1 = bitcast <2 x float> %p to i64 414 store atomic i64 %1, ptr %p2 unordered, align 8 415 ret void 416} 417 418@c = constant i32 42 419@g = global i32 42 420 421define i32 @atomic_load_from_constant_global() { 422; CHECK-LABEL: @atomic_load_from_constant_global( 423; CHECK-NEXT: ret i32 42 424; 425 %v = load atomic i32, ptr @c seq_cst, align 4 426 ret i32 %v 427} 428 429define i8 @atomic_load_from_constant_global_bitcast() { 430; CHECK-LABEL: @atomic_load_from_constant_global_bitcast( 431; CHECK-NEXT: ret i8 42 432; 433 %v = load atomic i8, ptr @c seq_cst, align 1 434 ret i8 %v 435} 436 437define void @atomic_load_from_non_constant_global() { 438; CHECK-LABEL: @atomic_load_from_non_constant_global( 439; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr @g seq_cst, align 4 440; CHECK-NEXT: ret void 441; 442 load atomic i32, ptr @g seq_cst, align 4 443 ret void 444} 445 446define void @volatile_load_from_constant_global() { 447; CHECK-LABEL: @volatile_load_from_constant_global( 448; CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @c, align 4 449; CHECK-NEXT: ret void 450; 451 load volatile i32, ptr @c, align 4 452 ret void 453} 454 455attributes #0 = { null_pointer_is_valid } 456