1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt -S -passes=gvn < %s | FileCheck %s 3 4@a = external constant i32 5; We can value forward across the fence since we can (semantically) 6; reorder the following load before the fence. 7define i32 @test(ptr %addr.i) { 8; CHECK-LABEL: define i32 @test 9; CHECK-SAME: (ptr [[ADDR_I:%.*]]) { 10; CHECK-NEXT: store i32 5, ptr [[ADDR_I]], align 4 11; CHECK-NEXT: fence release 12; CHECK-NEXT: ret i32 5 13; 14 store i32 5, ptr %addr.i, align 4 15 fence release 16 %a = load i32, ptr %addr.i, align 4 17 ret i32 %a 18} 19 20; Same as above 21define i32 @test2(ptr %addr.i) { 22; CHECK-LABEL: define i32 @test2 23; CHECK-SAME: (ptr [[ADDR_I:%.*]]) { 24; CHECK-NEXT: fence release 25; CHECK-NEXT: ret i32 0 26; 27 %a = load i32, ptr %addr.i, align 4 28 fence release 29 %a2 = load i32, ptr %addr.i, align 4 30 %res = sub i32 %a, %a2 31 ret i32 %res 32} 33 34; We can not value forward across an acquire barrier since we might 35; be syncronizing with another thread storing to the same variable 36; followed by a release fence. This is not so much enforcing an 37; ordering property (though it is that too), but a liveness 38; property. We expect to eventually see the value of store by 39; another thread when spinning on that location. 40define i32 @test3(ptr noalias %addr.i, ptr noalias %otheraddr) { 41; CHECK-LABEL: define i32 @test3 42; CHECK-SAME: (ptr noalias [[ADDR_I:%.*]], ptr noalias [[OTHERADDR:%.*]]) { 43; CHECK-NEXT: fence acquire 44; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[ADDR_I]], align 4 45; CHECK-NEXT: fence acquire 46; CHECK-NEXT: [[A2:%.*]] = load i32, ptr [[ADDR_I]], align 4 47; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[A2]] 48; CHECK-NEXT: ret i32 [[RES]] 49; 50 ; the following code is intented to model the unrolling of 51 ; two iterations in a spin loop of the form: 52 ; do { fence acquire: tmp = *%addr.i; ) while (!tmp); 53 ; It's hopefully clear that allowing PRE to turn this into: 54 ; if (!*%addr.i) while(true) {} would be unfortunate 55 fence acquire 56 %a = load i32, ptr %addr.i, align 4 57 fence acquire 58 %a2 = load i32, ptr %addr.i, align 4 59 %res = sub i32 %a, %a2 60 ret i32 %res 61} 62 63; We can forward the value forward the load 64; across both the fences, because the load is from 65; a constant memory location. 66define i32 @test4(ptr %addr) { 67; CHECK-LABEL: define i32 @test4 68; CHECK-SAME: (ptr [[ADDR:%.*]]) { 69; CHECK-NEXT: fence release 70; CHECK-NEXT: store i32 42, ptr [[ADDR]], align 8 71; CHECK-NEXT: fence seq_cst 72; CHECK-NEXT: ret i32 0 73; 74 %var = load i32, ptr @a 75 fence release 76 store i32 42, ptr %addr, align 8 77 fence seq_cst 78 %var2 = load i32, ptr @a 79 %var3 = sub i32 %var, %var2 80 ret i32 %var3 81} 82 83; Another example of why forwarding across an acquire fence is problematic 84; can be seen in a normal locking operation. Say we had: 85; *p = 5; unlock(l); lock(l); use(p); 86; forwarding the store to p would be invalid. A reasonable implementation 87; of unlock and lock might be: 88; unlock() { atomicrmw sub %l, 1 unordered; fence release } 89; lock() { 90; do { 91; %res = cmpxchg %p, 0, 1, monotonic monotonic 92; } while(!%res.success) 93; fence acquire; 94; } 95; Given we chose to forward across the release fence, we clearly can't forward 96; across the acquire fence as well. 97