xref: /llvm-project/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll (revision 5651af896c3df30da9edd101b1fb17c00de6636d)
1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
2
3; Check that volatile users of addrspacecast are not replaced.
4
5; CHECK-LABEL: @volatile_load_flat_from_global(
6; CHECK: load volatile i32, ptr
7; CHECK: store i32 %val, ptr addrspace(1)
8define amdgpu_kernel void @volatile_load_flat_from_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 {
9  %tmp0 = addrspacecast ptr addrspace(1) %input to ptr
10  %tmp1 = addrspacecast ptr addrspace(1) %output to ptr
11  %val = load volatile i32, ptr %tmp0, align 4
12  store i32 %val, ptr %tmp1, align 4
13  ret void
14}
15
16; CHECK-LABEL: @volatile_load_flat_from_constant(
17; CHECK: load volatile i32, ptr
18; CHECK: store i32 %val, ptr addrspace(1)
19define amdgpu_kernel void @volatile_load_flat_from_constant(ptr addrspace(4) nocapture %input, ptr addrspace(1) nocapture %output) #0 {
20  %tmp0 = addrspacecast ptr addrspace(4) %input to ptr
21  %tmp1 = addrspacecast ptr addrspace(1) %output to ptr
22  %val = load volatile i32, ptr %tmp0, align 4
23  store i32 %val, ptr %tmp1, align 4
24  ret void
25}
26
27; CHECK-LABEL: @volatile_load_flat_from_group(
28; CHECK: load volatile i32, ptr
29; CHECK: store i32 %val, ptr addrspace(3)
30define amdgpu_kernel void @volatile_load_flat_from_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 {
31  %tmp0 = addrspacecast ptr addrspace(3) %input to ptr
32  %tmp1 = addrspacecast ptr addrspace(3) %output to ptr
33  %val = load volatile i32, ptr %tmp0, align 4
34  store i32 %val, ptr %tmp1, align 4
35  ret void
36}
37
38; CHECK-LABEL: @volatile_load_flat_from_private(
39; CHECK: load volatile i32, ptr
40; CHECK: store i32 %val, ptr addrspace(5)
41define amdgpu_kernel void @volatile_load_flat_from_private(ptr addrspace(5) nocapture %input, ptr addrspace(5) nocapture %output) #0 {
42  %tmp0 = addrspacecast ptr addrspace(5) %input to ptr
43  %tmp1 = addrspacecast ptr addrspace(5) %output to ptr
44  %val = load volatile i32, ptr %tmp0, align 4
45  store i32 %val, ptr %tmp1, align 4
46  ret void
47}
48
49; CHECK-LABEL: @volatile_store_flat_to_global(
50; CHECK: load i32, ptr addrspace(1)
51; CHECK: store volatile i32 %val, ptr
52define amdgpu_kernel void @volatile_store_flat_to_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 {
53  %tmp0 = addrspacecast ptr addrspace(1) %input to ptr
54  %tmp1 = addrspacecast ptr addrspace(1) %output to ptr
55  %val = load i32, ptr %tmp0, align 4
56  store volatile i32 %val, ptr %tmp1, align 4
57  ret void
58}
59
60; CHECK-LABEL: @volatile_store_flat_to_group(
61; CHECK: load i32, ptr addrspace(3)
62; CHECK: store volatile i32 %val, ptr
63define amdgpu_kernel void @volatile_store_flat_to_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 {
64  %tmp0 = addrspacecast ptr addrspace(3) %input to ptr
65  %tmp1 = addrspacecast ptr addrspace(3) %output to ptr
66  %val = load i32, ptr %tmp0, align 4
67  store volatile i32 %val, ptr %tmp1, align 4
68  ret void
69}
70
71; CHECK-LABEL: @volatile_store_flat_to_private(
72; CHECK: load i32, ptr addrspace(5)
73; CHECK: store volatile i32 %val, ptr
74define amdgpu_kernel void @volatile_store_flat_to_private(ptr addrspace(5) nocapture %input, ptr addrspace(5) nocapture %output) #0 {
75  %tmp0 = addrspacecast ptr addrspace(5) %input to ptr
76  %tmp1 = addrspacecast ptr addrspace(5) %output to ptr
77  %val = load i32, ptr %tmp0, align 4
78  store volatile i32 %val, ptr %tmp1, align 4
79  ret void
80}
81
82; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat(
83; CHECK: addrspacecast ptr addrspace(3) %group.ptr to ptr
84; CHECK: atomicrmw volatile add ptr
85define i32 @volatile_atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 {
86  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
87  %ret = atomicrmw volatile add ptr %cast, i32 %y seq_cst
88  ret i32 %ret
89}
90
91; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat(
92; CHECK: addrspacecast ptr addrspace(1) %global.ptr to ptr
93; CHECK: %ret = atomicrmw volatile add ptr
94define i32 @volatile_atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 {
95  %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
96  %ret = atomicrmw volatile add ptr %cast, i32 %y seq_cst
97  ret i32 %ret
98}
99
100; CHECK-LABEL: @volatile_cmpxchg_global_to_flat(
101; CHECK: addrspacecast ptr addrspace(1) %global.ptr to ptr
102; CHECK: cmpxchg volatile ptr
103define { i32, i1 } @volatile_cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val) #0 {
104  %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
105  %ret = cmpxchg volatile ptr %cast, i32 %cmp, i32 %val seq_cst monotonic
106  ret { i32, i1 } %ret
107}
108
109; CHECK-LABEL: @volatile_cmpxchg_group_to_flat(
110; CHECK: addrspacecast ptr addrspace(3) %group.ptr to ptr
111; CHECK: cmpxchg volatile ptr
112define { i32, i1 } @volatile_cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val) #0 {
113  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
114  %ret = cmpxchg volatile ptr %cast, i32 %cmp, i32 %val seq_cst monotonic
115  ret { i32, i1 } %ret
116}
117
118; CHECK-LABEL: @volatile_memset_group_to_flat(
119; CHECK: %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
120; CHECK: call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true)
121define amdgpu_kernel void @volatile_memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 {
122  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
123  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true)
124  ret void
125}
126
127; CHECK-LABEL: @volatile_memset_global_to_flat(
128; CHECK: %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
129; CHECK: call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true)
130define amdgpu_kernel void @volatile_memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 {
131  %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
132  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true)
133  ret void
134}
135
136declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #1
137
138attributes #0 = { nounwind }
139attributes #1 = { argmemonly nounwind }
140