1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S < %s -instcombine | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 5target triple = "x86_64-apple-macosx10.7.0" 6 7; Check transforms involving atomic operations 8 9define i32 @test1(i32* %p) { 10; CHECK-LABEL: @test1( 11; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 12; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 13; CHECK-NEXT: ret i32 [[Z]] 14; 15 %x = load atomic i32, i32* %p seq_cst, align 4 16 %y = load i32, i32* %p, align 4 17 %z = add i32 %x, %y 18 ret i32 %z 19} 20 21define i32 @test2(i32* %p) { 22; CHECK-LABEL: @test2( 23; CHECK-NEXT: [[X:%.*]] = load volatile i32, i32* [[P:%.*]], align 4 24; CHECK-NEXT: [[Y:%.*]] = load volatile i32, i32* [[P]], align 4 25; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] 26; CHECK-NEXT: ret i32 [[Z]] 27; 28 %x = load volatile i32, i32* %p, align 4 29 %y = load volatile i32, i32* %p, align 4 30 %z = add i32 %x, %y 31 ret i32 %z 32} 33 34; The exact semantics of mixing volatile and non-volatile on the same 35; memory location are a bit unclear, but conservatively, we know we don't 36; want to remove the volatile. 37define i32 @test3(i32* %p) { 38; CHECK-LABEL: @test3( 39; CHECK-NEXT: [[X:%.*]] = load volatile i32, i32* [[P:%.*]], align 4 40; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 41; CHECK-NEXT: ret i32 [[Z]] 42; 43 %x = load volatile i32, i32* %p, align 4 44 %y = load i32, i32* %p, align 4 45 %z = add i32 %x, %y 46 ret i32 %z 47} 48 49; Forwarding from a stronger ordered atomic is fine 50define i32 @test4(i32* %p) { 51; CHECK-LABEL: @test4( 52; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 53; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 54; CHECK-NEXT: ret i32 [[Z]] 55; 56 %x = load atomic i32, i32* %p seq_cst, align 4 57 %y = load atomic i32, i32* %p unordered, align 4 58 %z = add i32 %x, %y 59 ret i32 %z 60} 61 62; Forwarding from a non-atomic is not. (The earlier load 63; could in priciple be promoted to atomic and then forwarded, 64; but we can't just drop the atomic from the load.) 65define i32 @test5(i32* %p) { 66; CHECK-LABEL: @test5( 67; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4 68; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 69; CHECK-NEXT: ret i32 [[Z]] 70; 71 %x = load atomic i32, i32* %p unordered, align 4 72 %y = load i32, i32* %p, align 4 73 %z = add i32 %x, %y 74 ret i32 %z 75} 76 77; Forwarding atomic to atomic is fine 78define i32 @test6(i32* %p) { 79; CHECK-LABEL: @test6( 80; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4 81; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 82; CHECK-NEXT: ret i32 [[Z]] 83; 84 %x = load atomic i32, i32* %p unordered, align 4 85 %y = load atomic i32, i32* %p unordered, align 4 86 %z = add i32 %x, %y 87 ret i32 %z 88} 89 90; FIXME: we currently don't do anything for monotonic 91define i32 @test7(i32* %p) { 92; CHECK-LABEL: @test7( 93; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 94; CHECK-NEXT: [[Y:%.*]] = load atomic i32, i32* [[P]] monotonic, align 4 95; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] 96; CHECK-NEXT: ret i32 [[Z]] 97; 98 %x = load atomic i32, i32* %p seq_cst, align 4 99 %y = load atomic i32, i32* %p monotonic, align 4 100 %z = add i32 %x, %y 101 ret i32 %z 102} 103 104; FIXME: We could forward in racy code 105define i32 @test8(i32* %p) { 106; CHECK-LABEL: @test8( 107; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 108; CHECK-NEXT: [[Y:%.*]] = load atomic i32, i32* [[P]] acquire, align 4 109; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] 110; CHECK-NEXT: ret i32 [[Z]] 111; 112 %x = load atomic i32, i32* %p seq_cst, align 4 113 %y = load atomic i32, i32* %p acquire, align 4 114 %z = add i32 %x, %y 115 ret i32 %z 116} 117 118; An unordered access to null is still unreachable. There's no 119; ordering imposed. 120define i32 @test9() { 121; CHECK-LABEL: @test9( 122; CHECK-NEXT: store i32 undef, i32* null, align 536870912 123; CHECK-NEXT: ret i32 undef 124; 125 %x = load atomic i32, i32* null unordered, align 4 126 ret i32 %x 127} 128 129define i32 @test9_no_null_opt() #0 { 130; CHECK-LABEL: @test9_no_null_opt( 131; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null unordered, align 536870912 132; CHECK-NEXT: ret i32 [[X]] 133; 134 %x = load atomic i32, i32* null unordered, align 4 135 ret i32 %x 136} 137 138; FIXME: Could also fold 139define i32 @test10() { 140; CHECK-LABEL: @test10( 141; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null monotonic, align 536870912 142; CHECK-NEXT: ret i32 [[X]] 143; 144 %x = load atomic i32, i32* null monotonic, align 4 145 ret i32 %x 146} 147 148define i32 @test10_no_null_opt() #0 { 149; CHECK-LABEL: @test10_no_null_opt( 150; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null monotonic, align 536870912 151; CHECK-NEXT: ret i32 [[X]] 152; 153 %x = load atomic i32, i32* null monotonic, align 4 154 ret i32 %x 155} 156 157; Would this be legal to fold? Probably? 158define i32 @test11() { 159; CHECK-LABEL: @test11( 160; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null seq_cst, align 536870912 161; CHECK-NEXT: ret i32 [[X]] 162; 163 %x = load atomic i32, i32* null seq_cst, align 4 164 ret i32 %x 165} 166 167define i32 @test11_no_null_opt() #0 { 168; CHECK-LABEL: @test11_no_null_opt( 169; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null seq_cst, align 536870912 170; CHECK-NEXT: ret i32 [[X]] 171; 172 %x = load atomic i32, i32* null seq_cst, align 4 173 ret i32 %x 174} 175 176; An unordered access to null is still unreachable. There's no 177; ordering imposed. 178define i32 @test12() { 179; CHECK-LABEL: @test12( 180; CHECK-NEXT: store atomic i32 undef, i32* null unordered, align 536870912 181; CHECK-NEXT: ret i32 0 182; 183 store atomic i32 0, i32* null unordered, align 4 184 ret i32 0 185} 186 187define i32 @test12_no_null_opt() #0 { 188; CHECK-LABEL: @test12_no_null_opt( 189; CHECK-NEXT: store atomic i32 0, i32* null unordered, align 536870912 190; CHECK-NEXT: ret i32 0 191; 192 store atomic i32 0, i32* null unordered, align 4 193 ret i32 0 194} 195 196; FIXME: Could also fold 197define i32 @test13() { 198; CHECK-LABEL: @test13( 199; CHECK-NEXT: store atomic i32 0, i32* null monotonic, align 536870912 200; CHECK-NEXT: ret i32 0 201; 202 store atomic i32 0, i32* null monotonic, align 4 203 ret i32 0 204} 205 206define i32 @test13_no_null_opt() #0 { 207; CHECK-LABEL: @test13_no_null_opt( 208; CHECK-NEXT: store atomic i32 0, i32* null monotonic, align 536870912 209; CHECK-NEXT: ret i32 0 210; 211 store atomic i32 0, i32* null monotonic, align 4 212 ret i32 0 213} 214 215; Would this be legal to fold? Probably? 216define i32 @test14() { 217; CHECK-LABEL: @test14( 218; CHECK-NEXT: store atomic i32 0, i32* null seq_cst, align 536870912 219; CHECK-NEXT: ret i32 0 220; 221 store atomic i32 0, i32* null seq_cst, align 4 222 ret i32 0 223} 224 225define i32 @test14_no_null_opt() #0 { 226; CHECK-LABEL: @test14_no_null_opt( 227; CHECK-NEXT: store atomic i32 0, i32* null seq_cst, align 536870912 228; CHECK-NEXT: ret i32 0 229; 230 store atomic i32 0, i32* null seq_cst, align 4 231 ret i32 0 232} 233 234@a = external global i32 235@b = external global i32 236 237define i32 @test15(i1 %cnd) { 238; CHECK-LABEL: @test15( 239; CHECK-NEXT: [[A_VAL:%.*]] = load atomic i32, i32* @a unordered, align 4 240; CHECK-NEXT: [[B_VAL:%.*]] = load atomic i32, i32* @b unordered, align 4 241; CHECK-NEXT: [[X:%.*]] = select i1 [[CND:%.*]], i32 [[A_VAL]], i32 [[B_VAL]] 242; CHECK-NEXT: ret i32 [[X]] 243; 244 %addr = select i1 %cnd, i32* @a, i32* @b 245 %x = load atomic i32, i32* %addr unordered, align 4 246 ret i32 %x 247} 248 249; FIXME: This would be legal to transform 250define i32 @test16(i1 %cnd) { 251; CHECK-LABEL: @test16( 252; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], i32* @a, i32* @b 253; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[ADDR]] monotonic, align 4 254; CHECK-NEXT: ret i32 [[X]] 255; 256 %addr = select i1 %cnd, i32* @a, i32* @b 257 %x = load atomic i32, i32* %addr monotonic, align 4 258 ret i32 %x 259} 260 261; FIXME: This would be legal to transform 262define i32 @test17(i1 %cnd) { 263; CHECK-LABEL: @test17( 264; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], i32* @a, i32* @b 265; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[ADDR]] seq_cst, align 4 266; CHECK-NEXT: ret i32 [[X]] 267; 268 %addr = select i1 %cnd, i32* @a, i32* @b 269 %x = load atomic i32, i32* %addr seq_cst, align 4 270 ret i32 %x 271} 272 273define i32 @test22(i1 %cnd) { 274; CHECK-LABEL: @test22( 275; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] 276; CHECK: block1: 277; CHECK-NEXT: br label [[MERGE:%.*]] 278; CHECK: block2: 279; CHECK-NEXT: br label [[MERGE]] 280; CHECK: merge: 281; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 2, [[BLOCK2]] ], [ 1, [[BLOCK1]] ] 282; CHECK-NEXT: store atomic i32 [[STOREMERGE]], i32* @a unordered, align 4 283; CHECK-NEXT: ret i32 0 284; 285 br i1 %cnd, label %block1, label %block2 286 287block1: 288 store atomic i32 1, i32* @a unordered, align 4 289 br label %merge 290block2: 291 store atomic i32 2, i32* @a unordered, align 4 292 br label %merge 293 294merge: 295 ret i32 0 296} 297 298; TODO: probably also legal here 299define i32 @test23(i1 %cnd) { 300; CHECK-LABEL: @test23( 301; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] 302; CHECK: block1: 303; CHECK-NEXT: store atomic i32 1, i32* @a monotonic, align 4 304; CHECK-NEXT: br label [[MERGE:%.*]] 305; CHECK: block2: 306; CHECK-NEXT: store atomic i32 2, i32* @a monotonic, align 4 307; CHECK-NEXT: br label [[MERGE]] 308; CHECK: merge: 309; CHECK-NEXT: ret i32 0 310; 311 br i1 %cnd, label %block1, label %block2 312 313block1: 314 store atomic i32 1, i32* @a monotonic, align 4 315 br label %merge 316block2: 317 store atomic i32 2, i32* @a monotonic, align 4 318 br label %merge 319 320merge: 321 ret i32 0 322} 323 324declare void @clobber() 325 326define i32 @test18(float* %p) { 327; CHECK-LABEL: @test18( 328; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] unordered, align 4 329; CHECK-NEXT: call void @clobber() 330; CHECK-NEXT: store atomic float [[X]], float* [[P]] unordered, align 4 331; CHECK-NEXT: ret i32 0 332; 333 %x = load atomic float, float* %p unordered, align 4 334 call void @clobber() ;; keep the load around 335 store atomic float %x, float* %p unordered, align 4 336 ret i32 0 337} 338 339; TODO: probably also legal in this case 340define i32 @test19(float* %p) { 341; CHECK-LABEL: @test19( 342; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] seq_cst, align 4 343; CHECK-NEXT: call void @clobber() 344; CHECK-NEXT: store atomic float [[X]], float* [[P]] seq_cst, align 4 345; CHECK-NEXT: ret i32 0 346; 347 %x = load atomic float, float* %p seq_cst, align 4 348 call void @clobber() ;; keep the load around 349 store atomic float %x, float* %p seq_cst, align 4 350 ret i32 0 351} 352 353define i32 @test20(i32** %p, i8* %v) { 354; CHECK-LABEL: @test20( 355; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** [[P:%.*]] to i8** 356; CHECK-NEXT: store atomic i8* [[V:%.*]], i8** [[TMP1]] unordered, align 4 357; CHECK-NEXT: ret i32 0 358; 359 %cast = bitcast i8* %v to i32* 360 store atomic i32* %cast, i32** %p unordered, align 4 361 ret i32 0 362} 363 364define i32 @test21(i32** %p, i8* %v) { 365; CHECK-LABEL: @test21( 366; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[V:%.*]] to i32* 367; CHECK-NEXT: store atomic i32* [[CAST]], i32** [[P:%.*]] monotonic, align 4 368; CHECK-NEXT: ret i32 0 369; 370 %cast = bitcast i8* %v to i32* 371 store atomic i32* %cast, i32** %p monotonic, align 4 372 ret i32 0 373} 374 375define void @pr27490a(i8** %p1, i8** %p2) { 376; CHECK-LABEL: @pr27490a( 377; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8 378; CHECK-NEXT: store volatile i8* [[L]], i8** [[P2:%.*]], align 8 379; CHECK-NEXT: ret void 380; 381 %l = load i8*, i8** %p1 382 store volatile i8* %l, i8** %p2 383 ret void 384} 385 386define void @pr27490b(i8** %p1, i8** %p2) { 387; CHECK-LABEL: @pr27490b( 388; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8 389; CHECK-NEXT: store atomic i8* [[L]], i8** [[P2:%.*]] seq_cst, align 8 390; CHECK-NEXT: ret void 391; 392 %l = load i8*, i8** %p1 393 store atomic i8* %l, i8** %p2 seq_cst, align 8 394 ret void 395} 396 397;; At the moment, we can't form atomic vectors by folding since these are 398;; not representable in the IR. This was pr29121. The right long term 399;; solution is to extend the IR to handle this case. 400define <2 x float> @no_atomic_vector_load(i64* %p) { 401; CHECK-LABEL: @no_atomic_vector_load( 402; CHECK-NEXT: [[LOAD:%.*]] = load atomic i64, i64* [[P:%.*]] unordered, align 8 403; CHECK-NEXT: [[DOTCAST:%.*]] = bitcast i64 [[LOAD]] to <2 x float> 404; CHECK-NEXT: ret <2 x float> [[DOTCAST]] 405; 406 %load = load atomic i64, i64* %p unordered, align 8 407 %.cast = bitcast i64 %load to <2 x float> 408 ret <2 x float> %.cast 409} 410 411define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) { 412; CHECK-LABEL: @no_atomic_vector_store( 413; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[P:%.*]] to i64 414; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[P2:%.*]] to i64* 415; CHECK-NEXT: store atomic i64 [[TMP1]], i64* [[TMP2]] unordered, align 8 416; CHECK-NEXT: ret void 417; 418 %1 = bitcast <2 x float> %p to i64 419 %2 = bitcast i8* %p2 to i64* 420 store atomic i64 %1, i64* %2 unordered, align 8 421 ret void 422} 423 424attributes #0 = { null_pointer_is_valid } 425