1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt -passes=gvn -S < %s | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 5target triple = "x86_64-apple-macosx10.7.0" 6 7@x = common global i32 0, align 4 8@y = common global i32 0, align 4 9 10; GVN across unordered store (allowed) 11define i32 @test1() nounwind uwtable ssp { 12; CHECK-LABEL: define i32 @test1 13; CHECK-SAME: () #[[ATTR0:[0-9]+]] { 14; CHECK-NEXT: entry: 15; CHECK-NEXT: [[X:%.*]] = load i32, ptr @y, align 4 16; CHECK-NEXT: store atomic i32 [[X]], ptr @x unordered, align 4 17; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[X]] 18; CHECK-NEXT: ret i32 [[Z]] 19; 20entry: 21 %x = load i32, ptr @y 22 store atomic i32 %x, ptr @x unordered, align 4 23 %y = load i32, ptr @y 24 %z = add i32 %x, %y 25 ret i32 %z 26} 27 28; GVN across unordered load (allowed) 29define i32 @test3() nounwind uwtable ssp { 30; CHECK-LABEL: define i32 @test3 31; CHECK-SAME: () #[[ATTR0]] { 32; CHECK-NEXT: entry: 33; CHECK-NEXT: [[X:%.*]] = load i32, ptr @y, align 4 34; CHECK-NEXT: [[Y:%.*]] = load atomic i32, ptr @x unordered, align 4 35; CHECK-NEXT: [[A:%.*]] = add i32 [[X]], [[X]] 36; CHECK-NEXT: [[B:%.*]] = add i32 [[Y]], [[A]] 37; CHECK-NEXT: ret i32 [[B]] 38; 39entry: 40 %x = load i32, ptr @y 41 %y = load atomic i32, ptr @x unordered, align 4 42 %z = load i32, ptr @y 43 %a = add i32 %x, %z 44 %b = add i32 %y, %a 45 ret i32 %b 46} 47 48; GVN load to unordered load (allowed) 49define i32 @test5() nounwind uwtable ssp { 50; CHECK-LABEL: define i32 @test5 51; CHECK-SAME: () #[[ATTR0]] { 52; CHECK-NEXT: entry: 53; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr @x unordered, align 4 54; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[X]] 55; CHECK-NEXT: ret i32 [[Z]] 56; 57entry: 58 %x = load atomic i32, ptr @x unordered, align 4 59 %y = load i32, ptr @x 60 %z = add i32 %x, %y 61 ret i32 %z 62} 63 64; GVN unordered load to load (unordered load must not be removed) 65define i32 @test6() nounwind uwtable ssp { 66; CHECK-LABEL: define i32 @test6 67; CHECK-SAME: () #[[ATTR0]] { 68; CHECK-NEXT: entry: 69; CHECK-NEXT: [[X:%.*]] = load i32, ptr @x, align 4 70; CHECK-NEXT: [[X2:%.*]] = load atomic i32, ptr @x unordered, align 4 71; CHECK-NEXT: [[X3:%.*]] = add i32 [[X]], [[X2]] 72; CHECK-NEXT: ret i32 [[X3]] 73; 74entry: 75 %x = load i32, ptr @x 76 %x2 = load atomic i32, ptr @x unordered, align 4 77 %x3 = add i32 %x, %x2 78 ret i32 %x3 79} 80 81; GVN across release-acquire pair (forbidden) 82define i32 @test7() nounwind uwtable ssp { 83; CHECK-LABEL: define i32 @test7 84; CHECK-SAME: () #[[ATTR0]] { 85; CHECK-NEXT: entry: 86; CHECK-NEXT: [[X:%.*]] = load i32, ptr @y, align 4 87; CHECK-NEXT: store atomic i32 [[X]], ptr @x release, align 4 88; CHECK-NEXT: [[W:%.*]] = load atomic i32, ptr @x acquire, align 4 89; CHECK-NEXT: [[Y:%.*]] = load i32, ptr @y, align 4 90; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] 91; CHECK-NEXT: ret i32 [[Z]] 92; 93entry: 94 %x = load i32, ptr @y 95 store atomic i32 %x, ptr @x release, align 4 96 %w = load atomic i32, ptr @x acquire, align 4 97 %y = load i32, ptr @y 98 %z = add i32 %x, %y 99 ret i32 %z 100} 101 102; GVN across monotonic store (allowed) 103define i32 @test9() nounwind uwtable ssp { 104; CHECK-LABEL: define i32 @test9 105; CHECK-SAME: () #[[ATTR0]] { 106; CHECK-NEXT: entry: 107; CHECK-NEXT: [[X:%.*]] = load i32, ptr @y, align 4 108; CHECK-NEXT: store atomic i32 [[X]], ptr @x monotonic, align 4 109; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[X]] 110; CHECK-NEXT: ret i32 [[Z]] 111; 112entry: 113 %x = load i32, ptr @y 114 store atomic i32 %x, ptr @x monotonic, align 4 115 %y = load i32, ptr @y 116 %z = add i32 %x, %y 117 ret i32 %z 118} 119 120; GVN of an unordered across monotonic load (not allowed) 121define i32 @test10() nounwind uwtable ssp { 122; CHECK-LABEL: define i32 @test10 123; CHECK-SAME: () #[[ATTR0]] { 124; CHECK-NEXT: entry: 125; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr @y unordered, align 4 126; CHECK-NEXT: [[CLOBBER:%.*]] = load atomic i32, ptr @x monotonic, align 4 127; CHECK-NEXT: [[Y:%.*]] = load atomic i32, ptr @y monotonic, align 4 128; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] 129; CHECK-NEXT: ret i32 [[Z]] 130; 131entry: 132 %x = load atomic i32, ptr @y unordered, align 4 133 %clobber = load atomic i32, ptr @x monotonic, align 4 134 %y = load atomic i32, ptr @y monotonic, align 4 135 %z = add i32 %x, %y 136 ret i32 %z 137} 138 139define i32 @PR22708(i1 %flag) { 140; CHECK-LABEL: define i32 @PR22708 141; CHECK-SAME: (i1 [[FLAG:%.*]]) { 142; CHECK-NEXT: entry: 143; CHECK-NEXT: br i1 [[FLAG]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 144; CHECK: if.then: 145; CHECK-NEXT: store i32 43, ptr @y, align 4 146; CHECK-NEXT: br label [[IF_END]] 147; CHECK: if.end: 148; CHECK-NEXT: [[TMP0:%.*]] = load atomic i32, ptr @x acquire, align 4 149; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr @y, align 4 150; CHECK-NEXT: ret i32 [[LOAD]] 151; 152entry: 153 br i1 %flag, label %if.then, label %if.end 154 155if.then: 156 store i32 43, ptr @y, align 4 157 br label %if.end 158 159if.end: 160 load atomic i32, ptr @x acquire, align 4 161 %load = load i32, ptr @y, align 4 162 ret i32 %load 163} 164 165; Can't remove a load over a ordering barrier 166define i32 @test12(i1 %B, ptr %P1, ptr %P2) { 167; CHECK-LABEL: define i32 @test12 168; CHECK-SAME: (i1 [[B:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) { 169; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr [[P1]], align 4 170; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[P2]] seq_cst, align 4 171; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[P1]], align 4 172; CHECK-NEXT: [[SEL:%.*]] = select i1 [[B]], i32 [[LOAD0]], i32 [[LOAD1]] 173; CHECK-NEXT: ret i32 [[SEL]] 174; 175 %load0 = load i32, ptr %P1 176 %1 = load atomic i32, ptr %P2 seq_cst, align 4 177 %load1 = load i32, ptr %P1 178 %sel = select i1 %B, i32 %load0, i32 %load1 179 ret i32 %sel 180} 181 182; atomic to non-atomic forwarding is legal 183define i32 @test13(ptr %P1) { 184; CHECK-LABEL: define i32 @test13 185; CHECK-SAME: (ptr [[P1:%.*]]) { 186; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4 187; CHECK-NEXT: ret i32 0 188; 189 %a = load atomic i32, ptr %P1 seq_cst, align 4 190 %b = load i32, ptr %P1 191 %res = sub i32 %a, %b 192 ret i32 %res 193} 194 195define i32 @test13b(ptr %P1) { 196; CHECK-LABEL: define i32 @test13b 197; CHECK-SAME: (ptr [[P1:%.*]]) { 198; CHECK-NEXT: store atomic i32 0, ptr [[P1]] unordered, align 4 199; CHECK-NEXT: ret i32 0 200; 201 store atomic i32 0, ptr %P1 unordered, align 4 202 %b = load i32, ptr %P1 203 ret i32 %b 204} 205 206; atomic to unordered atomic forwarding is legal 207define i32 @test14(ptr %P1) { 208; CHECK-LABEL: define i32 @test14 209; CHECK-SAME: (ptr [[P1:%.*]]) { 210; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4 211; CHECK-NEXT: ret i32 0 212; 213 %a = load atomic i32, ptr %P1 seq_cst, align 4 214 %b = load atomic i32, ptr %P1 unordered, align 4 215 %res = sub i32 %a, %b 216 ret i32 %res 217} 218 219; implementation restriction: can't forward to stonger 220; than unordered 221define i32 @test15(ptr %P1, ptr %P2) { 222; CHECK-LABEL: define i32 @test15 223; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 224; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4 225; CHECK-NEXT: [[B:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4 226; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[B]] 227; CHECK-NEXT: ret i32 [[RES]] 228; 229 %a = load atomic i32, ptr %P1 seq_cst, align 4 230 %b = load atomic i32, ptr %P1 seq_cst, align 4 231 %res = sub i32 %a, %b 232 ret i32 %res 233} 234 235; forwarding non-atomic to atomic is wrong! (However, 236; it would be legal to use the later value in place of the 237; former in this particular example. We just don't 238; do that right now.) 239define i32 @test16(ptr %P1, ptr %P2) { 240; CHECK-LABEL: define i32 @test16 241; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 242; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P1]], align 4 243; CHECK-NEXT: [[B:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 244; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[B]] 245; CHECK-NEXT: ret i32 [[RES]] 246; 247 %a = load i32, ptr %P1, align 4 248 %b = load atomic i32, ptr %P1 unordered, align 4 249 %res = sub i32 %a, %b 250 ret i32 %res 251} 252 253define i32 @test16b(ptr %P1) { 254; CHECK-LABEL: define i32 @test16b 255; CHECK-SAME: (ptr [[P1:%.*]]) { 256; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 257; CHECK-NEXT: [[B:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 258; CHECK-NEXT: ret i32 [[B]] 259; 260 store i32 0, ptr %P1 261 %b = load atomic i32, ptr %P1 unordered, align 4 262 ret i32 %b 263} 264 265; Can't DSE across a full fence 266define void @fence_seq_cst_store(ptr %P1, ptr %P2) { 267; CHECK-LABEL: define void @fence_seq_cst_store 268; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 269; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 270; CHECK-NEXT: store atomic i32 0, ptr [[P2]] seq_cst, align 4 271; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 272; CHECK-NEXT: ret void 273; 274 store i32 0, ptr %P1, align 4 275 store atomic i32 0, ptr %P2 seq_cst, align 4 276 store i32 0, ptr %P1, align 4 277 ret void 278} 279 280; Can't DSE across a full fence 281define void @fence_seq_cst(ptr %P1, ptr %P2) { 282; CHECK-LABEL: define void @fence_seq_cst 283; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 284; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 285; CHECK-NEXT: fence seq_cst 286; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 287; CHECK-NEXT: ret void 288; 289 store i32 0, ptr %P1, align 4 290 fence seq_cst 291 store i32 0, ptr %P1, align 4 292 ret void 293} 294 295; Can't DSE across a full syncscope("singlethread") fence 296define void @fence_seq_cst_st(ptr %P1, ptr %P2) { 297; CHECK-LABEL: define void @fence_seq_cst_st 298; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 299; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 300; CHECK-NEXT: fence syncscope("singlethread") seq_cst 301; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 302; CHECK-NEXT: ret void 303; 304 store i32 0, ptr %P1, align 4 305 fence syncscope("singlethread") seq_cst 306 store i32 0, ptr %P1, align 4 307 ret void 308} 309 310; Can't DSE across a full fence 311define void @fence_asm_sideeffect(ptr %P1, ptr %P2) { 312; CHECK-LABEL: define void @fence_asm_sideeffect 313; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 314; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 315; CHECK-NEXT: call void asm sideeffect "", ""() 316; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 317; CHECK-NEXT: ret void 318; 319 store i32 0, ptr %P1, align 4 320 call void asm sideeffect "", ""() 321 store i32 0, ptr %P1, align 4 322 ret void 323} 324 325; Can't DSE across a full fence 326define void @fence_asm_memory(ptr %P1, ptr %P2) { 327; CHECK-LABEL: define void @fence_asm_memory 328; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 329; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 330; CHECK-NEXT: call void asm "", "~{memory}"() 331; CHECK-NEXT: store i32 0, ptr [[P1]], align 4 332; CHECK-NEXT: ret void 333; 334 store i32 0, ptr %P1, align 4 335 call void asm "", "~{memory}"() 336 store i32 0, ptr %P1, align 4 337 ret void 338} 339 340; Can't remove a volatile load 341define i32 @volatile_load(ptr %P1, ptr %P2) { 342; CHECK-LABEL: define i32 @volatile_load 343; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 344; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P1]], align 4 345; CHECK-NEXT: [[B:%.*]] = load volatile i32, ptr [[P1]], align 4 346; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[B]] 347; CHECK-NEXT: ret i32 [[RES]] 348; 349 %a = load i32, ptr %P1, align 4 350 %b = load volatile i32, ptr %P1, align 4 351 %res = sub i32 %a, %b 352 ret i32 %res 353} 354 355; Can't remove redundant volatile loads 356define i32 @redundant_volatile_load(ptr %P1, ptr %P2) { 357; CHECK-LABEL: define i32 @redundant_volatile_load 358; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 359; CHECK-NEXT: [[A:%.*]] = load volatile i32, ptr [[P1]], align 4 360; CHECK-NEXT: [[B:%.*]] = load volatile i32, ptr [[P1]], align 4 361; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[B]] 362; CHECK-NEXT: ret i32 [[RES]] 363; 364 %a = load volatile i32, ptr %P1, align 4 365 %b = load volatile i32, ptr %P1, align 4 366 %res = sub i32 %a, %b 367 ret i32 %res 368} 369 370; Can't DSE a volatile store 371define void @volatile_store(ptr %P1, ptr %P2) { 372; CHECK-LABEL: define void @volatile_store 373; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 374; CHECK-NEXT: store volatile i32 0, ptr [[P1]], align 4 375; CHECK-NEXT: store i32 3, ptr [[P1]], align 4 376; CHECK-NEXT: ret void 377; 378 store volatile i32 0, ptr %P1, align 4 379 store i32 3, ptr %P1, align 4 380 ret void 381} 382 383; Can't DSE a redundant volatile store 384define void @redundant_volatile_store(ptr %P1, ptr %P2) { 385; CHECK-LABEL: define void @redundant_volatile_store 386; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 387; CHECK-NEXT: store volatile i32 0, ptr [[P1]], align 4 388; CHECK-NEXT: store volatile i32 0, ptr [[P1]], align 4 389; CHECK-NEXT: ret void 390; 391 store volatile i32 0, ptr %P1, align 4 392 store volatile i32 0, ptr %P1, align 4 393 ret void 394} 395 396; Can value forward from volatiles 397define i32 @test20(ptr %P1, ptr %P2) { 398; CHECK-LABEL: define i32 @test20 399; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { 400; CHECK-NEXT: [[A:%.*]] = load volatile i32, ptr [[P1]], align 4 401; CHECK-NEXT: ret i32 0 402; 403 %a = load volatile i32, ptr %P1, align 4 404 %b = load i32, ptr %P1, align 4 405 %res = sub i32 %a, %b 406 ret i32 %res 407} 408 409; We're currently conservative about widening 410define i64 @widen1(ptr %P1) { 411; CHECK-LABEL: define i64 @widen1 412; CHECK-SAME: (ptr [[P1:%.*]]) { 413; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 414; CHECK-NEXT: [[B:%.*]] = load atomic i64, ptr [[P1]] unordered, align 4 415; CHECK-NEXT: [[A64:%.*]] = sext i32 [[A]] to i64 416; CHECK-NEXT: [[RES:%.*]] = sub i64 [[A64]], [[B]] 417; CHECK-NEXT: ret i64 [[RES]] 418; 419 %a = load atomic i32, ptr %P1 unordered, align 4 420 %b = load atomic i64, ptr %P1 unordered, align 4 421 %a64 = sext i32 %a to i64 422 %res = sub i64 %a64, %b 423 ret i64 %res 424} 425 426; narrowing does work 427define i64 @narrow(ptr %P1) { 428; CHECK-LABEL: define i64 @narrow 429; CHECK-SAME: (ptr [[P1:%.*]]) { 430; CHECK-NEXT: [[A64:%.*]] = load atomic i64, ptr [[P1]] unordered, align 4 431; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A64]] to i32 432; CHECK-NEXT: [[B64:%.*]] = sext i32 [[TMP1]] to i64 433; CHECK-NEXT: [[RES:%.*]] = sub i64 [[A64]], [[B64]] 434; CHECK-NEXT: ret i64 [[RES]] 435; 436 %a64 = load atomic i64, ptr %P1 unordered, align 4 437 %b = load atomic i32, ptr %P1 unordered, align 4 438 %b64 = sext i32 %b to i64 439 %res = sub i64 %a64, %b64 440 ret i64 %res 441} 442 443; Missed optimization, we don't yet optimize ordered loads 444define i64 @narrow2(ptr %P1) { 445; CHECK-LABEL: define i64 @narrow2 446; CHECK-SAME: (ptr [[P1:%.*]]) { 447; CHECK-NEXT: [[A64:%.*]] = load atomic i64, ptr [[P1]] acquire, align 4 448; CHECK-NEXT: [[B:%.*]] = load atomic i32, ptr [[P1]] acquire, align 4 449; CHECK-NEXT: [[B64:%.*]] = sext i32 [[B]] to i64 450; CHECK-NEXT: [[RES:%.*]] = sub i64 [[A64]], [[B64]] 451; CHECK-NEXT: ret i64 [[RES]] 452; 453 %a64 = load atomic i64, ptr %P1 acquire, align 4 454 %b = load atomic i32, ptr %P1 acquire, align 4 455 %b64 = sext i32 %b to i64 456 %res = sub i64 %a64, %b64 457 ret i64 %res 458} 459 460; Note: The cross block FRE testing is deliberately light. All of the tricky 461; bits of legality are shared code with the block-local FRE above. These 462; are here only to show that we haven't obviously broken anything. 463 464; unordered atomic to unordered atomic 465define i32 @non_local_fre(ptr %P1) { 466; CHECK-LABEL: define i32 @non_local_fre 467; CHECK-SAME: (ptr [[P1:%.*]]) { 468; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 469; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 470; CHECK-NEXT: br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]] 471; CHECK: early: 472; CHECK-NEXT: ret i32 0 473; CHECK: next: 474; CHECK-NEXT: ret i32 0 475; 476 %a = load atomic i32, ptr %P1 unordered, align 4 477 %cmp = icmp eq i32 %a, 0 478 br i1 %cmp, label %early, label %next 479early: 480 ret i32 %a 481next: 482 %b = load atomic i32, ptr %P1 unordered, align 4 483 %res = sub i32 %a, %b 484 ret i32 %res 485} 486 487; unordered atomic to non-atomic 488define i32 @non_local_fre2(ptr %P1) { 489; CHECK-LABEL: define i32 @non_local_fre2 490; CHECK-SAME: (ptr [[P1:%.*]]) { 491; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 492; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 493; CHECK-NEXT: br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]] 494; CHECK: early: 495; CHECK-NEXT: ret i32 0 496; CHECK: next: 497; CHECK-NEXT: ret i32 0 498; 499 %a = load atomic i32, ptr %P1 unordered, align 4 500 %cmp = icmp eq i32 %a, 0 501 br i1 %cmp, label %early, label %next 502early: 503 ret i32 %a 504next: 505 %b = load i32, ptr %P1 506 %res = sub i32 %a, %b 507 ret i32 %res 508} 509 510; Can't forward ordered atomics. 511define i32 @non_local_fre3(ptr %P1) { 512; CHECK-LABEL: define i32 @non_local_fre3 513; CHECK-SAME: (ptr [[P1:%.*]]) { 514; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] acquire, align 4 515; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 516; CHECK-NEXT: br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]] 517; CHECK: early: 518; CHECK-NEXT: ret i32 0 519; CHECK: next: 520; CHECK-NEXT: [[B:%.*]] = load atomic i32, ptr [[P1]] acquire, align 4 521; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[B]] 522; CHECK-NEXT: ret i32 [[RES]] 523; 524 %a = load atomic i32, ptr %P1 acquire, align 4 525 %cmp = icmp eq i32 %a, 0 526 br i1 %cmp, label %early, label %next 527early: 528 ret i32 %a 529next: 530 %b = load atomic i32, ptr %P1 acquire, align 4 531 %res = sub i32 %a, %b 532 ret i32 %res 533} 534 535declare void @clobber() 536 537; unordered atomic to unordered atomic 538define i32 @non_local_pre(ptr %P1) { 539; CHECK-LABEL: define i32 @non_local_pre 540; CHECK-SAME: (ptr [[P1:%.*]]) { 541; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 542; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 543; CHECK-NEXT: br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]] 544; CHECK: early: 545; CHECK-NEXT: call void @clobber() 546; CHECK-NEXT: [[B_PRE:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 547; CHECK-NEXT: br label [[NEXT]] 548; CHECK: next: 549; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_PRE]], [[EARLY]] ], [ [[A]], [[TMP0:%.*]] ] 550; CHECK-NEXT: ret i32 [[B]] 551; 552 %a = load atomic i32, ptr %P1 unordered, align 4 553 %cmp = icmp eq i32 %a, 0 554 br i1 %cmp, label %early, label %next 555early: 556 call void @clobber() 557 br label %next 558next: 559 %b = load atomic i32, ptr %P1 unordered, align 4 560 ret i32 %b 561} 562 563; unordered atomic to non-atomic 564define i32 @non_local_pre2(ptr %P1) { 565; CHECK-LABEL: define i32 @non_local_pre2 566; CHECK-SAME: (ptr [[P1:%.*]]) { 567; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 568; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 569; CHECK-NEXT: br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]] 570; CHECK: early: 571; CHECK-NEXT: call void @clobber() 572; CHECK-NEXT: [[B_PRE:%.*]] = load i32, ptr [[P1]], align 4 573; CHECK-NEXT: br label [[NEXT]] 574; CHECK: next: 575; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_PRE]], [[EARLY]] ], [ [[A]], [[TMP0:%.*]] ] 576; CHECK-NEXT: ret i32 [[B]] 577; 578 %a = load atomic i32, ptr %P1 unordered, align 4 579 %cmp = icmp eq i32 %a, 0 580 br i1 %cmp, label %early, label %next 581early: 582 call void @clobber() 583 br label %next 584next: 585 %b = load i32, ptr %P1 586 ret i32 %b 587} 588 589; non-atomic to unordered atomic - can't forward! 590define i32 @non_local_pre3(ptr %P1) { 591; CHECK-LABEL: define i32 @non_local_pre3 592; CHECK-SAME: (ptr [[P1:%.*]]) { 593; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P1]], align 4 594; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 595; CHECK-NEXT: br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]] 596; CHECK: early: 597; CHECK-NEXT: call void @clobber() 598; CHECK-NEXT: br label [[NEXT]] 599; CHECK: next: 600; CHECK-NEXT: [[B:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 601; CHECK-NEXT: ret i32 [[B]] 602; 603 %a = load i32, ptr %P1 604 %cmp = icmp eq i32 %a, 0 605 br i1 %cmp, label %early, label %next 606early: 607 call void @clobber() 608 br label %next 609next: 610 %b = load atomic i32, ptr %P1 unordered, align 4 611 ret i32 %b 612} 613 614; ordered atomic to ordered atomic - can't forward 615define i32 @non_local_pre4(ptr %P1) { 616; CHECK-LABEL: define i32 @non_local_pre4 617; CHECK-SAME: (ptr [[P1:%.*]]) { 618; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4 619; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 620; CHECK-NEXT: br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]] 621; CHECK: early: 622; CHECK-NEXT: call void @clobber() 623; CHECK-NEXT: br label [[NEXT]] 624; CHECK: next: 625; CHECK-NEXT: [[B:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4 626; CHECK-NEXT: ret i32 [[B]] 627; 628 %a = load atomic i32, ptr %P1 seq_cst, align 4 629 %cmp = icmp eq i32 %a, 0 630 br i1 %cmp, label %early, label %next 631early: 632 call void @clobber() 633 br label %next 634next: 635 %b = load atomic i32, ptr %P1 seq_cst, align 4 636 ret i32 %b 637} 638 639; can't remove volatile on any path 640define i32 @non_local_pre5(ptr %P1) { 641; CHECK-LABEL: define i32 @non_local_pre5 642; CHECK-SAME: (ptr [[P1:%.*]]) { 643; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4 644; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 645; CHECK-NEXT: br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]] 646; CHECK: early: 647; CHECK-NEXT: call void @clobber() 648; CHECK-NEXT: br label [[NEXT]] 649; CHECK: next: 650; CHECK-NEXT: [[B:%.*]] = load volatile i32, ptr [[P1]], align 4 651; CHECK-NEXT: ret i32 [[B]] 652; 653 %a = load atomic i32, ptr %P1 seq_cst, align 4 654 %cmp = icmp eq i32 %a, 0 655 br i1 %cmp, label %early, label %next 656early: 657 call void @clobber() 658 br label %next 659next: 660 %b = load volatile i32, ptr %P1 661 ret i32 %b 662} 663 664 665; ordered atomic to unordered atomic 666define i32 @non_local_pre6(ptr %P1) { 667; CHECK-LABEL: define i32 @non_local_pre6 668; CHECK-SAME: (ptr [[P1:%.*]]) { 669; CHECK-NEXT: [[A:%.*]] = load atomic i32, ptr [[P1]] seq_cst, align 4 670; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 671; CHECK-NEXT: br i1 [[CMP]], label [[EARLY:%.*]], label [[NEXT:%.*]] 672; CHECK: early: 673; CHECK-NEXT: call void @clobber() 674; CHECK-NEXT: [[B_PRE:%.*]] = load atomic i32, ptr [[P1]] unordered, align 4 675; CHECK-NEXT: br label [[NEXT]] 676; CHECK: next: 677; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_PRE]], [[EARLY]] ], [ [[A]], [[TMP0:%.*]] ] 678; CHECK-NEXT: ret i32 [[B]] 679; 680 %a = load atomic i32, ptr %P1 seq_cst, align 4 681 %cmp = icmp eq i32 %a, 0 682 br i1 %cmp, label %early, label %next 683early: 684 call void @clobber() 685 br label %next 686next: 687 %b = load atomic i32, ptr %P1 unordered, align 4 688 ret i32 %b 689} 690 691