xref: /llvm-project/llvm/test/Transforms/EarlyCSE/fence.ll (revision 1876592ce3e7acfe3a143312815a7ebedcf60b4b)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt -S -passes=early-cse -earlycse-debug-hash < %s | FileCheck %s
3; NOTE: This file is testing the current implementation.  Some of
4; the transforms used as negative tests below would be legal, but
5; only if reached through a chain of logic which EarlyCSE is incapable
6; of performing.  To say it differently, this file tests a conservative
7; version of the memory model.  If we want to extend EarlyCSE to be more
8; aggressive in the future, we may need to relax some of the negative tests.
9
10; We can value forward across the fence since we can (semantically)
11; reorder the following load before the fence.
12define i32 @test(ptr %addr.i) {
13; CHECK-LABEL: define i32 @test
14; CHECK-SAME: (ptr [[ADDR_I:%.*]]) {
15; CHECK-NEXT:    store i32 5, ptr [[ADDR_I]], align 4
16; CHECK-NEXT:    fence release
17; CHECK-NEXT:    ret i32 5
18;
19  store i32 5, ptr %addr.i, align 4
20  fence release
21  %a = load i32, ptr %addr.i, align 4
22  ret i32 %a
23}
24
25; Same as above
26define i32 @test2(ptr noalias %addr.i, ptr noalias %otheraddr) {
27; CHECK-LABEL: define i32 @test2
28; CHECK-SAME: (ptr noalias [[ADDR_I:%.*]], ptr noalias [[OTHERADDR:%.*]]) {
29; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[ADDR_I]], align 4
30; CHECK-NEXT:    fence release
31; CHECK-NEXT:    ret i32 [[A]]
32;
33  %a = load i32, ptr %addr.i, align 4
34  fence release
35  %a2 = load i32, ptr %addr.i, align 4
36  %res = sub i32 %a, %a2
37  ret i32 %a
38}
39
40; We can not value forward across an acquire barrier since we might
41; be syncronizing with another thread storing to the same variable
42; followed by a release fence.  If this thread observed the release
43; had happened, we must present a consistent view of memory at the
44; fence.  Note that it would be legal to reorder '%a' after the fence
45; and then remove '%a2'.  The current implementation doesn't know how
46; to do this, but if it learned, this test will need revised.
47define i32 @test3(ptr noalias %addr.i, ptr noalias %otheraddr) {
48; CHECK-LABEL: define i32 @test3
49; CHECK-SAME: (ptr noalias [[ADDR_I:%.*]], ptr noalias [[OTHERADDR:%.*]]) {
50; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[ADDR_I]], align 4
51; CHECK-NEXT:    fence acquire
52; CHECK-NEXT:    [[A2:%.*]] = load i32, ptr [[ADDR_I]], align 4
53; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[A2]]
54; CHECK-NEXT:    ret i32 [[RES]]
55;
56  %a = load i32, ptr %addr.i, align 4
57  fence acquire
58  %a2 = load i32, ptr %addr.i, align 4
59  %res = sub i32 %a, %a2
60  ret i32 %res
61}
62
63; We can not dead store eliminate accross the fence.  We could in
64; principal reorder the second store above the fence and then DSE either
65; store, but this is beyond the simple last-store DSE which EarlyCSE
66; implements.
67define void @test4(ptr %addr.i) {
68; CHECK-LABEL: define void @test4
69; CHECK-SAME: (ptr [[ADDR_I:%.*]]) {
70; CHECK-NEXT:    store i32 5, ptr [[ADDR_I]], align 4
71; CHECK-NEXT:    fence release
72; CHECK-NEXT:    store i32 5, ptr [[ADDR_I]], align 4
73; CHECK-NEXT:    ret void
74;
75  store i32 5, ptr %addr.i, align 4
76  fence release
77  store i32 5, ptr %addr.i, align 4
78  ret void
79}
80
81; We *could* DSE across this fence, but don't.  No other thread can
82; observe the order of the acquire fence and the store.
83define void @test5(ptr %addr.i) {
84; CHECK-LABEL: define void @test5
85; CHECK-SAME: (ptr [[ADDR_I:%.*]]) {
86; CHECK-NEXT:    store i32 5, ptr [[ADDR_I]], align 4
87; CHECK-NEXT:    fence acquire
88; CHECK-NEXT:    store i32 5, ptr [[ADDR_I]], align 4
89; CHECK-NEXT:    ret void
90;
91  store i32 5, ptr %addr.i, align 4
92  fence acquire
93  store i32 5, ptr %addr.i, align 4
94  ret void
95}
96