xref: /llvm-project/llvm/test/Transforms/GVN/fence.ll (revision 1876592ce3e7acfe3a143312815a7ebedcf60b4b)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt -S -passes=gvn < %s | FileCheck %s
3
4@a = external constant i32
5; We can value forward across the fence since we can (semantically)
6; reorder the following load before the fence.
7define i32 @test(ptr %addr.i) {
8; CHECK-LABEL: define i32 @test
9; CHECK-SAME: (ptr [[ADDR_I:%.*]]) {
10; CHECK-NEXT:    store i32 5, ptr [[ADDR_I]], align 4
11; CHECK-NEXT:    fence release
12; CHECK-NEXT:    ret i32 5
13;
14  store i32 5, ptr %addr.i, align 4
15  fence release
16  %a = load i32, ptr %addr.i, align 4
17  ret i32 %a
18}
19
20; Same as above
21define i32 @test2(ptr %addr.i) {
22; CHECK-LABEL: define i32 @test2
23; CHECK-SAME: (ptr [[ADDR_I:%.*]]) {
24; CHECK-NEXT:    fence release
25; CHECK-NEXT:    ret i32 0
26;
27  %a = load i32, ptr %addr.i, align 4
28  fence release
29  %a2 = load i32, ptr %addr.i, align 4
30  %res = sub i32 %a, %a2
31  ret i32 %res
32}
33
34; We can not value forward across an acquire barrier since we might
35; be syncronizing with another thread storing to the same variable
36; followed by a release fence.  This is not so much enforcing an
37; ordering property (though it is that too), but a liveness
38; property.  We expect to eventually see the value of store by
39; another thread when spinning on that location.
40define i32 @test3(ptr noalias %addr.i, ptr noalias %otheraddr) {
41; CHECK-LABEL: define i32 @test3
42; CHECK-SAME: (ptr noalias [[ADDR_I:%.*]], ptr noalias [[OTHERADDR:%.*]]) {
43; CHECK-NEXT:    fence acquire
44; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[ADDR_I]], align 4
45; CHECK-NEXT:    fence acquire
46; CHECK-NEXT:    [[A2:%.*]] = load i32, ptr [[ADDR_I]], align 4
47; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[A2]]
48; CHECK-NEXT:    ret i32 [[RES]]
49;
50  ; the following code is intented to model the unrolling of
51  ; two iterations in a spin loop of the form:
52  ;   do { fence acquire: tmp = *%addr.i; ) while (!tmp);
53  ; It's hopefully clear that allowing PRE to turn this into:
54  ;   if (!*%addr.i) while(true) {} would be unfortunate
55  fence acquire
56  %a = load i32, ptr %addr.i, align 4
57  fence acquire
58  %a2 = load i32, ptr %addr.i, align 4
59  %res = sub i32 %a, %a2
60  ret i32 %res
61}
62
63; We can forward the value forward the load
64; across both the fences, because the load is from
65; a constant memory location.
66define i32 @test4(ptr %addr) {
67; CHECK-LABEL: define i32 @test4
68; CHECK-SAME: (ptr [[ADDR:%.*]]) {
69; CHECK-NEXT:    fence release
70; CHECK-NEXT:    store i32 42, ptr [[ADDR]], align 8
71; CHECK-NEXT:    fence seq_cst
72; CHECK-NEXT:    ret i32 0
73;
74  %var = load i32, ptr @a
75  fence release
76  store i32 42, ptr %addr, align 8
77  fence seq_cst
78  %var2 = load i32, ptr @a
79  %var3 = sub i32 %var, %var2
80  ret i32 %var3
81}
82
83; Another example of why forwarding across an acquire fence is problematic
84; can be seen in a normal locking operation.  Say we had:
85; *p = 5; unlock(l); lock(l); use(p);
86; forwarding the store to p would be invalid.  A reasonable implementation
87; of unlock and lock might be:
88; unlock() { atomicrmw sub %l, 1 unordered; fence release }
89; lock() {
90;   do {
91;     %res = cmpxchg %p, 0, 1, monotonic monotonic
92;   } while(!%res.success)
93;   fence acquire;
94; }
95; Given we chose to forward across the release fence, we clearly can't forward
96; across the acquire fence as well.
97